diff --git a/doc/benchmarks.rst b/doc/benchmarks.rst index a5fce2d8..b8c330fd 100644 --- a/doc/benchmarks.rst +++ b/doc/benchmarks.rst @@ -174,6 +174,15 @@ Pseudo-code of the benchmark:: See the `Dulwich project `_. + +docutils +-------- + +Use Docutils_ to convert Docutils' documentation to HTML. +Representative of building a medium-sized documentation set. + +.. _Docutils: https://docutils.sourceforge.io/ + fannkuch -------- diff --git a/pyperformance/data-files/benchmarks/MANIFEST b/pyperformance/data-files/benchmarks/MANIFEST index c8212d94..b8f2885a 100644 --- a/pyperformance/data-files/benchmarks/MANIFEST +++ b/pyperformance/data-files/benchmarks/MANIFEST @@ -15,6 +15,7 @@ deepcopy deltablue django_template dulwich_log +docutils fannkuch float genshi diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/api/publisher.txt b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/api/publisher.txt new file mode 100644 index 00000000..d45bbb8d --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/api/publisher.txt @@ -0,0 +1,352 @@ +======================== + The Docutils Publisher +======================== + +:Author: David Goodger +:Contact: docutils-develop@lists.sourceforge.net +:Date: $Date$ +:Revision: $Revision$ +:Copyright: This document has been placed in the public domain. + +.. contents:: + + +The ``docutils.core.Publisher`` class is the core of Docutils, +managing all the processing and relationships between components. See +`PEP 258`_ for an overview of Docutils components. + +The ``docutils.core.publish_*`` convenience functions are the normal +entry points for using Docutils as a library. + +See `Inside A Docutils Command-Line Front-End Tool`_ for an overview +of a typical Docutils front-end tool, including how the Publisher +class is used. + +.. _PEP 258: ../peps/pep-0258.html +.. _Inside A Docutils Command-Line Front-End Tool: ../howto/cmdline-tool.html + + +Publisher Convenience Functions +=============================== + +Each of these functions set up a ``docutils.core.Publisher`` object, +then call its ``publish`` method. ``docutils.core.Publisher.publish`` +handles everything else. There are several convenience functions in +the ``docutils.core`` module: + +:_`publish_cmdline()`: for command-line front-end tools, like + ``rst2html.py``. There are several examples in the ``tools/`` + directory. A detailed analysis of one such tool is in `Inside A + Docutils Command-Line Front-End Tool`_ + +:_`publish_file()`: for programmatic use with file-like I/O. In + addition to writing the encoded output to a file, also returns the + encoded output as a string. + +:_`publish_string()`: for programmatic use with string I/O. Returns + the encoded output as a string. + +:_`publish_parts()`: for programmatic use with string input; returns a + dictionary of document parts. Dictionary keys are the names of + parts, and values are Unicode strings; encoding is up to the client. + Useful when only portions of the processed document are desired. + See `publish_parts() Details`_ below. + + There are usage examples in the `docutils/examples.py`_ module. + +:_`publish_doctree()`: for programmatic use with string input; returns a + Docutils document tree data structure (doctree). The doctree can be + modified, pickled & unpickled, etc., and then reprocessed with + `publish_from_doctree()`_. + +:_`publish_from_doctree()`: for programmatic use to render from an + existing document tree data structure (doctree); returns the encoded + output as a string. + +:_`publish_programmatically()`: for custom programmatic use. This + function implements common code and is used by ``publish_file``, + ``publish_string``, and ``publish_parts``. It returns a 2-tuple: + the encoded string output and the Publisher object. + +.. _Inside A Docutils Command-Line Front-End Tool: ../howto/cmdline-tool.html +.. _docutils/examples.py: ../../docutils/examples.py + + +Configuration +------------- + +To pass application-specific setting defaults to the Publisher +convenience functions, use the ``settings_overrides`` parameter. Pass +a dictionary of setting names & values, like this:: + + overrides = {'input_encoding': 'ascii', + 'output_encoding': 'latin-1'} + output = publish_string(..., settings_overrides=overrides) + +Settings from command-line options override configuration file +settings, and they override application defaults. For details, see +`Docutils Runtime Settings`_. See `Docutils Configuration`_ for +details about individual settings. + +.. _Docutils Runtime Settings: ./runtime-settings.html +.. _Docutils Configuration: ../user/config.html + + +Encodings +--------- + +The default output encoding of Docutils is UTF-8. +Docutils may introduce some non-ASCII text if you use +`auto-symbol footnotes`_ or the `"contents" directive`_. + +.. _auto-symbol footnotes: + ../ref/rst/restructuredtext.html#auto-symbol-footnotes +.. _"contents" directive: + ../ref/rst/directives.html#table-of-contents + + +``publish_parts()`` Details +=========================== + +The ``docutils.core.publish_parts()`` convenience function returns a +dictionary of document parts. Dictionary keys are the names of parts, +and values are Unicode strings. + +Each Writer component may publish a different set of document parts, +described below. Not all writers implement all parts. + + +Parts Provided By All Writers +----------------------------- + +_`encoding` + The output encoding setting. + +_`version` + The version of Docutils used. + +_`whole` + ``parts['whole']`` contains the entire formatted document. + + +Parts Provided By the HTML Writers +---------------------------------- + +HTML4 Writer +```````````` + +_`body` + ``parts['body']`` is equivalent to parts['fragment_']. It is + *not* equivalent to parts['html_body_']. + +_`body_prefix` + ``parts['body_prefix']`` contains:: + + + +
+ + and, if applicable:: + +
+ ... +
+ +_`body_pre_docinfo` + ``parts['body_pre_docinfo]`` contains (as applicable):: + +

...

+

...

+ +_`body_suffix` + ``parts['body_suffix']`` contains:: + +
+ + (the end-tag for ``
``), the footer division + if applicable:: + + + + and:: + + + + +_`docinfo` + ``parts['docinfo']`` contains the document bibliographic data, the + docinfo field list rendered as a table. + +_`footer` + ``parts['footer']`` contains the document footer content, meant to + appear at the bottom of a web page, or repeated at the bottom of + every printed page. + +_`fragment` + ``parts['fragment']`` contains the document body (*not* the HTML + ````). In other words, it contains the entire document, + less the document title, subtitle, docinfo, header, and footer. + +_`head` + ``parts['head']`` contains ```` tags and the document + ``...``. + +_`head_prefix` + ``parts['head_prefix']`` contains the XML declaration, the DOCTYPE + declaration, the ```` start tag and the ```` start + tag. + +_`header` + ``parts['header']`` contains the document header content, meant to + appear at the top of a web page, or repeated at the top of every + printed page. + +_`html_body` + ``parts['html_body']`` contains the HTML ```` content, less + the ```` and ```` tags themselves. + +_`html_head` + ``parts['html_head']`` contains the HTML ```` content, less + the stylesheet link and the ```` and ```` tags + themselves. Since ``publish_parts`` returns Unicode strings and + does not know about the output encoding, the "Content-Type" meta + tag's "charset" value is left unresolved, as "%s":: + + + + The interpolation should be done by client code. + +_`html_prolog` + ``parts['html_prolog]`` contains the XML declaration and the + doctype declaration. The XML declaration's "encoding" attribute's + value is left unresolved, as "%s":: + + + + The interpolation should be done by client code. + +_`html_subtitle` + ``parts['html_subtitle']`` contains the document subtitle, + including the enclosing ``

`` & ``

`` + tags. + +_`html_title` + ``parts['html_title']`` contains the document title, including the + enclosing ``

`` & ``

`` tags. + +_`meta` + ``parts['meta']`` contains all ```` tags. + +_`stylesheet` + ``parts['stylesheet']`` contains the embedded stylesheet or + stylesheet link. + +_`subtitle` + ``parts['subtitle']`` contains the document subtitle text and any + inline markup. It does not include the enclosing ``

`` & + ``

`` tags. + +_`title` + ``parts['title']`` contains the document title text and any inline + markup. It does not include the enclosing ``

`` & ``

`` + tags. + + +PEP/HTML Writer +``````````````` + +The PEP/HTML writer provides the same parts as the `HTML4 writer`_, +plus the following: + +_`pepnum` + ``parts['pepnum']`` contains + + +S5/HTML Writer +`````````````` + +The S5/HTML writer provides the same parts as the `HTML4 writer`_. + + +HTML5 Writer +```````````` + +The HTML5 writer provides the same parts as the `HTML4 writer`_. +However, it uses semantic HTML5 elements for the document, header and +footer. + + +Parts Provided by the LaTeX2e Writer +------------------------------------ + +See the template files for examples how these parts can be combined +into a valid LaTeX document. + +abstract + ``parts['abstract']`` contains the formatted content of the + 'abstract' docinfo field. + +body + ``parts['body']`` contains the document's content. In other words, it + contains the entire document, except the document title, subtitle, and + docinfo. + + This part can be included into another LaTeX document body using the + ``\input{}`` command. + +body_pre_docinfo + ``parts['body_pre_docinfo]`` contains the ``\maketitle`` command. + +dedication + ``parts['dedication']`` contains the formatted content of the + 'dedication' docinfo field. + +docinfo + ``parts['docinfo']`` contains the document bibliographic data, the + docinfo field list rendered as a table. + + With ``--use-latex-docinfo`` 'author', 'organization', 'contact', + 'address' and 'date' info is moved to titledata. + + 'dedication' and 'abstract' are always moved to separate parts. + +fallbacks + ``parts['fallbacks']`` contains fallback definitions for + Docutils-specific commands and environments. + +head_prefix + ``parts['head_prefix']`` contains the declaration of + documentclass and document options. + +latex_preamble + ``parts['latex_preamble']`` contains the argument of the + ``--latex-preamble`` option. + +pdfsetup + ``parts['pdfsetup']`` contains the PDF properties + ("hyperref" package setup). + +requirements + ``parts['requirements']`` contains required packages and setup + before the stylesheet inclusion. + +stylesheet + ``parts['stylesheet']`` contains the embedded stylesheet(s) or + stylesheet loading command(s). + +subtitle + ``parts['subtitle']`` contains the document subtitle text and any + inline markup. + +title + ``parts['title']`` contains the document title text and any inline + markup. + +titledata + ``parts['titledata]`` contains the combined title data in + ``\title``, ``\author``, and ``\data`` macros. + + With ``--use-latex-docinfo``, this includes the 'author', + 'organization', 'contact', 'address' and 'date' docinfo items. diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/api/runtime-settings.txt b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/api/runtime-settings.txt new file mode 100644 index 00000000..7bfab71d --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/api/runtime-settings.txt @@ -0,0 +1,218 @@ +=========================== + Docutils Runtime Settings +=========================== + +:Author: David Goodger, Günter Milde +:Contact: docutils-develop@lists.sourceforge.net +:Date: $Date$ +:Revision: $Revision$ +:Copyright: This document has been placed in the public domain. + +.. contents:: + + +Introduction +============ + +Docutils runtime settings are assembled from several sources: + +* Settings specifications of the selected components_, +* `configuration files`_ (if enabled), and +* command-line options (if enabled). + +Docutils overlays default and explicitly specified values from these +sources such that settings behave the way we want and expect them to +behave. + + +Settings priority +================= + +The sources are overlaid in the following order (later sources +overwrite earlier ones): + +1. Defaults specified in the `settings_spec`__ and + `settings_defaults`__ attributes for each component_. + + __ SettingsSpec.settings_spec_ + __ SettingsSpec.settings_defaults_ + +2. Defaults specified in the `settings_default_overrides`__ attribute + for each component_. + + __ SettingsSpec.settings_default_overrides_ + +3. Settings specified in the `settings_overrides`__ parameter of the + `convenience functions`_ resp. the `settings_overrides` attribute of + a `Publisher`_ instance. + + __ `settings_overrides parameter`_ + +4. Settings specified in `active sections`_ of the `configuration files`_ + in the order described in `Configuration File Sections & Entries`_ + (if enabled). + +5. Command line options (if enabled). + +For details see the ``docutils/__init__.py``, ``docutils/core.py``, and +``docutils.frontend.py`` modules and the implementation description in +`Runtime Settings Processing`_. + + +.. _SettingsSpec: + +SettingsSpec base class +======================= + +.. note:: + Implementation details will change with the move to replace the + deprecated optparse_ module with argparse_. + +The `docutils.SettingsSpec` base class is inherited by Docutils +components_ and `frontend.OptionParser`. +It defines the following six **attributes**: + +.. _SettingsSpec.settings_spec: + +`settings_spec` + a sequence of + + 1. option group title (string or None) + + 2. description (string or None) + + 3. option tuples with + + a) help text + b) options string(s) + c) dictionary with keyword arguments for `OptionParser.add_option()`_ + and an optional "validator", a `frontend.validate_*()` function + that processes the values (e.g. convert to other data types). + + For examples, see the source of ``frontend.OptionParser.settings_spec`` + or the `settings_spec` attributes of the Docutils components_. + + .. _SettingsSpec.settings_defaults: + +`settings_defaults` + for purely programmatic settings + (not accessible from command line and configuration files). + + .. _SettingsSpec.settings_default_overrides: + +`settings_default_overrides` + to override defaults for settings + defined in other components' `setting_specs`. + +`relative_path_settings` + listing settings containing filesystem paths. + + .. _active sections: + +`config_section` + the configuration file section specific to this + component. + +`config_section_dependencies` + lists configuration files sections + that should also be read (before the `config_section`). + +The last two attributes define which configuration file sections are +"active". See also `Configuration File Sections & Entries`_. + + +Glossary +======== + +.. _component: + +components +---------- + +Docutils front-ends and applications combine a selection of +*components* of the `Docutils Project Model`_. + +All components inherit the `SettingsSpec`_ base class. +This means that all instances of ``readers.Reader``, ``parsers.Parser``, and +``writers.Writer`` are also instances of ``docutils.SettingsSpec``. + +For the determination of runtime settings, ``frontend.OptionParser`` and +the `settings_spec parameter`_ in application settings specifications +are treated as components as well. + + +.. _convenience function: + +convenience functions +--------------------- + +Applications usually deploy Docutils by one of the +`Publisher convenience functions`_. + +All convenience functions accept the following optional parameters: + +.. _settings parameter: + +`settings` + a ``frontend.Values`` instance. + If present, it must be complete. + + No further runtime settings processing is done and the + following parameters have no effect. + + .. _settings_spec parameter: + +`settings_spec` + a `SettingsSpec`_ subclass or instance containing the settings + specification for the "Application" itself. + The instance is added to the components_ (after the generic + settings, parser, reader, and writer). + + .. _settings_overrides parameter: + +`settings_overrides` + a dictionary which is used to update the + defaults of the components' settings specifications. + + .. _config_section parameter: + +`config_section` + the name of an application-specific + `configuration file section`_ for this application. + + Can be specified instead of a `settings_spec` (a new SettingsSpec_ + instance that just defines a configuration section will be created) + or in addition to a `settings_spec` + (overriding its `config_section` attribute). + + +settings_spec +------------- + +The name ``settings_spec`` may refer to + +a) an instance of the SettingsSpec_ class, or +b) the data structure `SettingsSpec.settings_spec`_ which is used to + store settings details. + + +.. References: + +.. _Publisher: publisher.html +.. _Publisher convenience functions: + publisher.html#publisher-convenience-functions +.. _front-end tools: ../user/tools.html +.. _configuration files: +.. _Docutils Configuration: ../user/config.html#configuration-files +.. _configuration file section: +.. _Configuration File Sections & Entries: + ../user/config.html#configuration-file-sections-entries +.. _Docutils Project Model: ../peps/pep-0258.html#docutils-project-model +.. _Reader: ../peps/pep-0258.html#reader +.. _Runtime Settings Processing: ../dev/runtime-settings-processing.html + +.. _optparse: https://docs.python.org/dev/library/optparse.html +.. _argparse: https://docs.python.org/dev/library/argparse.html +.. _OptionParser.add_option(): + https://docs.python.org/dev/library/optparse.html + #optparse.OptionParser.add_option diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/api/transforms.txt b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/api/transforms.txt new file mode 100644 index 00000000..9013a7b5 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/api/transforms.txt @@ -0,0 +1,207 @@ +===================== + Docutils Transforms +===================== + +:Author: David Goodger +:Contact: docutils-develop@lists.sourceforge.net +:Revision: $Revision$ +:Date: $Date$ +:Copyright: This document has been placed in the public domain. + + +.. contents:: + +Transforms change the document tree in-place, add to the tree, or prune it. +Transforms resolve references and footnote numbers, process interpreted +text, and do other context-sensitive processing. Each transform is a +subclass of ``docutils.transforms.Transform``. + +There are `transforms added by components`_, others (e.g. +``parts.Contents``) are added by the parser, if a corresponding directive is +found in the document. + +To add a transform, components (objects inheriting from +Docutils.Component like Readers, Parsers, Writers, Input, Output) overwrite +the ``get_transforms()`` method of their base class. After the Reader has +finished processing, the Publisher calls +``Transformer.populate_from_components()`` with a list of components and all +transforms returned by the component's ``get_transforms()`` method are +stored in a `transformer object` attached to the document tree. + + +For more about transforms and the Transformer object, see also `PEP +258`_. (The ``default_transforms()`` attribute of component classes mentioned +there is deprecated. Use the ``get_transforms()`` method instead.) + +.. _PEP 258: ../peps/pep-0258.html#transformer + + +Transforms Listed in Priority Order +=================================== + +Transform classes each have a default_priority attribute which is used by +the Transformer to apply transforms in order (low to high). The default +priority can be overridden when adding transforms to the Transformer object. + + +============================== ============================ ======== +Transform: module.Class Added By Priority +============================== ============================ ======== +misc.class "class" (d/p) 210 + +references.Substitutions standalone (r), pep (r) 220 + +references.PropagateTargets standalone (r), pep (r) 260 + +frontmatter.DocTitle standalone (r) 320 + +frontmatter.DocInfo standalone (r) 340 + +frontmatter.SectSubTitle standalone (r) 350 + +peps.Headers pep (r) 360 + +peps.Contents pep (r) 380 + +universal.StripClasses... Writer (w) 420 + +references.AnonymousHyperlinks standalone (r), pep (r) 440 + +references.IndirectHyperlinks standalone (r), pep (r) 460 + +peps.TargetNotes pep (r) 520 + +references.TargetNotes peps.TargetNotes (t/p) 0 + +misc.CallBack peps.TargetNotes (t/p) 1 + +references.TargetNotes "target-notes" (d/p) 540 + +references.Footnotes standalone (r), pep (r) 620 + +references.ExternalTargets standalone (r), pep (r) 640 + +references.InternalTargets standalone (r), pep (r) 660 + +parts.SectNum "sectnum" (d/p) 710 + +parts.Contents "contents" (d/p), 720 + peps.Contents (t/p) + +universal.StripComments Reader (r) 740 + +peps.PEPZero peps.Headers (t/p) 760 + +components.Filter *not used* 780 + +universal.Decorations Reader (r) 820 + +misc.Transitions standalone (r), pep (r) 830 + +universal.ExposeInternals Reader (r) 840 + +references.DanglingReferences standalone (r), pep (r) 850 + +universal.SmartQuotes Parser 855 + +universal.Messages Writer (w) 860 + +universal.FilterMessages Writer (w) 870 + +universal.TestMessages DocutilsTestSupport 880 + +writer_aux.Compound *not used, to be removed* 910 + +writer_aux.Admonitions html4css1 (w), 920 + latex2e (w) + +misc.CallBack n/a 990 +============================== ============================ ======== + +Key: + +* (r): Reader +* (w): Writer +* (d): Directive +* (t): Transform +* (/p): Via a "pending" node + + +Transform Priority Range Categories +=================================== + +==== ==== ================================================ + Priority +---------- ------------------------------------------------ +From To Category +==== ==== ================================================ + 0 99 immediate execution (added by another transform) + 100 199 very early (non-standard) + 200 299 very early + 300 399 early + 400 699 main + 700 799 late + 800 899 very late + 900 999 very late (non-standard) +==== ==== ================================================ + + +Transforms added by components +=============================== + + +readers.Reader: + | universal.Decorations, + | universal.ExposeInternals, + | universal.StripComments + +readers.ReReader: + None + +readers.standalone.Reader: + | references.Substitutions, + | references.PropagateTargets, + | frontmatter.DocTitle, + | frontmatter.SectionSubTitle, + | frontmatter.DocInfo, + | references.AnonymousHyperlinks, + | references.IndirectHyperlinks, + | references.Footnotes, + | references.ExternalTargets, + | references.InternalTargets, + | references.DanglingReferences, + | misc.Transitions + +readers.pep.Reader: + | references.Substitutions, + | references.PropagateTargets, + | references.AnonymousHyperlinks, + | references.IndirectHyperlinks, + | references.Footnotes, + | references.ExternalTargets, + | references.InternalTargets, + | references.DanglingReferences, + | misc.Transitions, + | peps.Headers, + | peps.Contents, + | peps.TargetNotes + +parsers.rst.Parser + universal.SmartQuotes + +writers.Writer: + | universal.Messages, + | universal.FilterMessages, + | universal.StripClassesAndElements + +writers.UnfilteredWriter + None + +writers.latex2e.Writer + writer_aux.Admonitions + +writers.html4css1.Writer: + writer_aux.Admonitions + +writers.odf_odt.Writer: + removes references.DanglingReferences diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/dev/distributing.txt b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/dev/distributing.txt new file mode 100644 index 00000000..6fa30f4b --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/dev/distributing.txt @@ -0,0 +1,149 @@ +=============================== + Docutils_ Distributor's Guide +=============================== + +:Author: Lea Wiemann +:Contact: docutils-develop@lists.sourceforge.net +:Revision: $Revision$ +:Date: $Date$ +:Copyright: This document has been placed in the public domain. + +.. _Docutils: https://docutils.sourceforge.io/ + +.. contents:: + +This document describes how to create packages of Docutils (e.g. for +shipping with a Linux distribution). If you have any questions, +please direct them to the Docutils-develop_ mailing list. + +First, please download the most current `release tarball`_ and unpack +it. + +.. _Docutils-develop: ../user/mailing-lists.html#docutils-develop +.. _release tarball: https://docutils.sourceforge.io/#download + + +Dependencies +============ + +Docutils has the following dependencies: + +* Python 3.7 or later is required. + Use ">= Python 3.7" in the dependencies. + +* Docutils may optionally make use of the PIL (`Python Imaging + Library`_ or Pillow_). If PIL is present, it is automatically + detected by Docutils. + +* Docutils recommends the `Pygments`_ syntax hightlighter. If available, it + is used for highlighting the content of `code directives`_ and roles as + well as included source code files (with the "code" option to the include_ + directive). + +* Docutils can use the `recommonmark`_ parser to parse input in + the Markdown format (new in 0.17). + +.. _Python Imaging Library: + https://en.wikipedia.org/wiki/Python_Imaging_Library +.. _Pillow: https://pypi.org/project/Pillow/ +.. _Pygments: https://pygments.org/ +.. _recommonmark: https://pypi.org/project/recommonmark/ + +.. _code directives: ../ref/rst/directives.html#code +.. _include: ../ref/rst/directives.html#include + + +Python Files +============ + +The Docutils Python files must be installed into the +``site-packages/`` directory of Python. Running ``python setup.py +install`` should do the trick, but if you want to place the files +yourself, you can just install the ``docutils/`` directory of the +Docutils tarball to ``/usr/lib/python/site-packages/docutils/``. In +this case you should also compile the Python files to ``.pyc`` and/or +``.pyo`` files so that Docutils doesn't need to be recompiled every +time it's executed. + + +Executables +=========== + +The executable front-end tools are located in the ``tools/`` directory +of the Docutils tarball. + +The ``rst2*.py`` tools are intended for end-users. You should install them +to ``/usr/bin/``. You do not need to change the names (e.g. to +``docutils-rst2html.py``) because the ``rst2`` prefix is unique. + + +Documentation +============= + +The documentation should be generated using ``buildhtml.py``. To +generate HTML for all documentation files, go to the ``tools/`` +directory and run:: + + # Place html4css1.css in base directory. + cp ../docutils/writers/html4css1/html4css1.css .. + ./buildhtml.py --stylesheet-path=../html4css1.css .. + +Then install the following files to ``/usr/share/doc/docutils/`` (or +wherever you install documentation): + +* All ``.html`` and ``.txt`` files in the base directory. + +* The ``docs/`` directory. + + Do not install the contents of the ``docs/`` directory directly to + ``/usr/share/doc/docutils/``; it's incomplete and would contain + invalid references! + +* The ``licenses/`` directory. + +* ``html4css1.css`` in the base directory. + + +Removing the ``.txt`` Files +--------------------------- + +If you are tight with disk space, you can remove all ``.txt`` files in +the tree except for: + +* those in the ``licenses/`` directory because they have not been + processed to HTML and + +* ``user/rst/cheatsheet.txt`` and ``user/rst/demo.txt``, which should + be readable in source form. + +Before you remove the ``.txt`` files you should rerun ``buildhtml.py`` +with the ``--no-source-link`` switch to avoid broken references to the +source files. + + +Other Files +=========== + +You may want to install the Emacs-Lisp files +``tools/editors/emacs/*.el`` into the appropriate directory. + + +Configuration File +================== + +It is possible to have a system-wide configuration file at +``/etc/docutils.conf``. However, this is usually not necessary. You +should *not* install ``tools/docutils.conf`` into ``/etc/``. + + +Tests +===== + +While you probably do not need to ship the tests with your +distribution, you can test your package by installing it and then +running ``alltests.py`` from the ``tests/`` directory of the Docutils +tarball. + +For more information on testing, view the `Docutils Testing`_ page. + +.. _Docutils Testing: https://docutils.sourceforge.io/docs/dev/testing.html diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/dev/enthought-plan.txt b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/dev/enthought-plan.txt new file mode 100644 index 00000000..71310c1e --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/dev/enthought-plan.txt @@ -0,0 +1,480 @@ +=========================================== + Plan for Enthought API Documentation Tool +=========================================== + +:Author: David Goodger +:Contact: docutils-develop@lists.sourceforge.net +:Date: $Date$ +:Revision: $Revision$ +:Copyright: 2004 by `Enthought, Inc. `_ +:License: `Enthought License`_ (BSD-style) + +.. _Enthought License: https://docutils.sourceforge.io/licenses/enthought.txt + +This document should be read in conjunction with the `Enthought API +Documentation Tool RFP`__ prepared by Janet Swisher. + +__ enthought-rfp.html + +.. contents:: +.. sectnum:: + + +Introduction +============ + +In March 2004 at I met Eric Jones, president and CTO of `Enthought, +Inc.`_, at `PyCon 2004`_ in Washington DC. He told me that Enthought +was using reStructuredText_ for source code documentation, but they +had some issues. He asked if I'd be interested in doing some work on +a customized API documentation tool. Shortly after PyCon, Janet +Swisher, Enthought's senior technical writer, contacted me to work out +details. Some email, a trip to Austin in May, and plenty of Texas +hospitality later, we had a project. This document will record the +details, milestones, and evolution of the project. + +In a nutshell, Enthought is sponsoring the implementation of an open +source API documentation tool that meets their needs. Fortuitously, +their needs coincide well with the "Python Source Reader" description +in `PEP 258`_. In other words, Enthought is funding some significant +improvements to Docutils, improvements that were planned but never +implemented due to time and other constraints. The implementation +will take place gradually over several months, on a part-time basis. + +This is an ideal example of cooperation between a corporation and an +open-source project. The corporation, the project, I personally, and +the community all benefit. Enthought, whose commitment to open source +is also evidenced by their sponsorship of SciPy_, benefits by +obtaining a useful piece of software, much more quickly than would +have been possible without their support. Docutils benefits directly +from the implementation of one of its core subsystems. I benefit from +the funding, which allows me to justify the long hours to my wife and +family. All the corporations, projects, and individuals that make up +the community will benefit from the end result, which will be great. + +All that's left now is to actually do the work! + +.. _PyCon 2004: http://pycon.org/dc2004/ +.. _reStructuredText: https://docutils.sourceforge.io/rst.html +.. _SciPy: http://www.scipy.org/ + + +Development Plan +================ + +1. Analyze prior art, most notably Epydoc_ and HappyDoc_, to see how + they do what they do. I have no desire to reinvent wheels + unnecessarily. I want to take the best ideas from each tool, + combined with the outline in `PEP 258`_ (which will evolve), and + build at least the foundation of the definitive Python + auto-documentation tool. + + .. _Epydoc: http://epydoc.sourceforge.net/ + .. _HappyDoc: http://happydoc.sourceforge.net/ + .. _PEP 258: + https://docutils.sourceforge.io/docs/peps/pep-0258.html#python-source-reader + +2. Decide on a base platform. The best way to achieve Enthought's + goals in a reasonable time frame may be to extend Epydoc or + HappyDoc. Or it may be necessary to start fresh. + +3. Extend the reStructuredText parser. See `Proposed Changes to + reStructuredText`_ below. + +4. Depending on the base platform chosen, build or extend the + docstring & doc comment extraction tool. This may be the biggest + part of the project, but I won't be able to break it down into + details until more is known. + + +Repository +========== + +If possible, all software and documentation files will be stored in +the Subversion repository of Docutils and/or the base project, which +are all publicly-available via anonymous pserver access. + +The Docutils project is very open about granting Subversion write +access; so far, everyone who asked has been given access. Any +Enthought staff member who would like Subversion write access will get +it. + +If either Epydoc or HappyDoc is chosen as the base platform, I will +ask the project's administrator for CVS access for myself and any +Enthought staff member who wants it. If sufficient access is not +granted -- although I doubt that there would be any problem -- we may +have to begin a fork, which could be hosted on SourceForge, on +Enthought's Subversion server, or anywhere else deemed appropriate. + + +Copyright & License +=================== + +Most existing Docutils files have been placed in the public domain, as +follows:: + + :Copyright: This document has been placed in the public domain. + +This is in conjunction with the "Public Domain Dedication" section of +COPYING.txt__. + +__ https://docutils.sourceforge.io/COPYING.html + +The code and documentation originating from Enthought funding will +have Enthought's copyright and license declaration. While I will try +to keep Enthought-specific code and documentation separate from the +existing files, there will inevitably be cases where it makes the most +sense to extend existing files. + +I propose the following: + +1. New files related to this Enthought-funded work will be identified + with the following field-list headers:: + + :Copyright: 2004 by Enthought, Inc. + :License: Enthought License (BSD Style) + + The license field text will be linked to the license file itself. + +2. For significant or major changes to an existing file (more than 10% + change), the headers shall change as follows (for example):: + + :Copyright: 2001-2004 by David Goodger + :Copyright: 2004 by Enthought, Inc. + :License: BSD-style + + If the Enthought-funded portion becomes greater than the previously + existing portion, Enthought's copyright line will be shown first. + +3. In cases of insignificant or minor changes to an existing file + (less than 10% change), the public domain status shall remain + unchanged. + +A section describing all of this will be added to the Docutils +`COPYING`__ instructions file. + +If another project is chosen as the base project, similar changes +would be made to their files, subject to negotiation. + +__ https://docutils.sourceforge.io/COPYING.html + + +Proposed Changes to reStructuredText +==================================== + +Doc Comment Syntax +------------------ + +The "traits" construct is implemented as dictionaries, where +standalone strings would be Python syntax errors. Therefore traits +require documentation in comments. We also need a way to +differentiate between ordinary "internal" comments and documentation +comments (doc comments). + +Javadoc uses the following syntax for doc comments:: + + /** + * The first line of a multi-line doc comment begins with a slash + * and *two* asterisks. The doc comment ends normally. + */ + +Python doesn't have multi-line comments; only single-line. A similar +convention in Python might look like this:: + + ## + # The first line of a doc comment begins with *two* hash marks. + # The doc comment ends with the first non-comment line. + 'data' : AnyValue, + + ## The double-hash-marks could occur on the first line of text, + # saving a line in the source. + 'data' : AnyValue, + +How to indicate the end of the doc comment? :: + + ## + # The first line of a doc comment begins with *two* hash marks. + # The doc comment ends with the first non-comment line, or another + # double-hash-mark. + ## + # This is an ordinary, internal, non-doc comment. + 'data' : AnyValue, + + ## First line of a doc comment, terse syntax. + # Second (and last) line. Ends here: ## + # This is an ordinary, internal, non-doc comment. + 'data' : AnyValue, + +Or do we even need to worry about this case? A simple blank line +could be used:: + + ## First line of a doc comment, terse syntax. + # Second (and last) line. Ends with a blank line. + + # This is an ordinary, internal, non-doc comment. + 'data' : AnyValue, + +Other possibilities:: + + #" Instead of double-hash-marks, we could use a hash mark and a + # quotation mark to begin the doc comment. + 'data' : AnyValue, + + ## We could require double-hash-marks on every line. This has the + ## added benefit of delimiting the *end* of the doc comment, as + ## well as working well with line wrapping in Emacs + ## ("fill-paragraph" command). + # Ordinary non-doc comment. + 'data' : AnyValue, + + #" A hash mark and a quotation mark on each line looks funny, and + #" it doesn't work well with line wrapping in Emacs. + 'data' : AnyValue, + +These styles (repeated on each line) work well with line wrapping in +Emacs:: + + ## #> #| #- #% #! #* + +These styles do *not* work well with line wrapping in Emacs:: + + #" #' #: #) #. #/ #@ #$ #^ #= #+ #_ #~ + +The style of doc comment indicator used could be a runtime, global +and/or per-module setting. That may add more complexity than it's +worth though. + + +Recommendation +`````````````` + +I recommend adopting "#*" on every line:: + + # This is an ordinary non-doc comment. + + #* This is a documentation comment, with an asterisk after the + #* hash marks on every line. + 'data' : AnyValue, + +I initially recommended adopting double-hash-marks:: + + # This is an ordinary non-doc comment. + + ## This is a documentation comment, with double-hash-marks on + ## every line. + 'data' : AnyValue, + +But Janet Swisher rightly pointed out that this could collide with +ordinary comments that are then block-commented. This applies to +double-hash-marks on the first line only as well. So they're out. + +On the other hand, the JavaDoc-comment style ("##" on the first line +only, "#" after that) is used in Fredrik Lundh's PythonDoc_. It may +be worthwhile to conform to this syntax, reinforcing it as a standard. +PythonDoc does not support terse doc comments (text after "##" on the +first line). + +.. _PythonDoc: http://effbot.org/zone/pythondoc.htm + + +Update +`````` + +Enthought's Traits system has switched to a metaclass base, and traits +are now defined via ordinary attributes. Therefore doc comments are +no longer absolutely necessary; attribute docstrings will suffice. +Doc comments may still be desirable though, since they allow +documentation to precede the thing being documented. + + +Docstring Density & Whitespace Minimization +------------------------------------------- + +One problem with extensively documented classes & functions, is that +there is a lot of screen space wasted on whitespace. Here's some +current Enthought code (from lib/cp/fluids/gassmann.py):: + + def max_gas(temperature, pressure, api, specific_gravity=.56): + """ + Computes the maximum dissolved gas in oil using Batzle and + Wang (1992). + + Parameters + ---------- + temperature : sequence + Temperature in degrees Celsius + pressure : sequence + Pressure in MPa + api : sequence + Stock tank oil API + specific_gravity : sequence + Specific gravity of gas at STP, default is .56 + + Returns + ------- + max_gor : sequence + Maximum dissolved gas in liters/liter + + Description + ----------- + This estimate is based on equations given by Mavko, Mukerji, + and Dvorkin, (1998, pp. 218-219, or 2003, p. 236) obtained + originally from Batzle and Wang (1992). + """ + code... + +The docstring is 24 lines long. + +Rather than using subsections, field lists (which exist now) can save +6 lines:: + + def max_gas(temperature, pressure, api, specific_gravity=.56): + """ + Computes the maximum dissolved gas in oil using Batzle and + Wang (1992). + + :Parameters: + temperature : sequence + Temperature in degrees Celsius + pressure : sequence + Pressure in MPa + api : sequence + Stock tank oil API + specific_gravity : sequence + Specific gravity of gas at STP, default is .56 + :Returns: + max_gor : sequence + Maximum dissolved gas in liters/liter + :Description: This estimate is based on equations given by + Mavko, Mukerji, and Dvorkin, (1998, pp. 218-219, or 2003, + p. 236) obtained originally from Batzle and Wang (1992). + """ + code... + +As with the "Description" field above, field bodies may begin on the +same line as the field name, which also saves space. + +The output for field lists is typically a table structure. For +example: + + :Parameters: + temperature : sequence + Temperature in degrees Celsius + pressure : sequence + Pressure in MPa + api : sequence + Stock tank oil API + specific_gravity : sequence + Specific gravity of gas at STP, default is .56 + :Returns: + max_gor : sequence + Maximum dissolved gas in liters/liter + :Description: + This estimate is based on equations given by Mavko, + Mukerji, and Dvorkin, (1998, pp. 218-219, or 2003, p. 236) + obtained originally from Batzle and Wang (1992). + +But the definition lists describing the parameters and return values +are still wasteful of space. There are a lot of half-filled lines. + +Definition lists are currently defined as:: + + term : classifier + definition + +Where the classifier part is optional. Ideas for improvements: + +1. We could allow multiple classifiers:: + + term : classifier one : two : three ... + definition + +2. We could allow the definition on the same line as the term, using + some embedded/inline markup: + + * "--" could be used, but only in limited and well-known contexts:: + + term -- definition + + This is the syntax used by StructuredText (one of + reStructuredText's predecessors). It was not adopted for + reStructuredText because it is ambiguous -- people often use "--" + in their text, as I just did. But given a constrained context, + the ambiguity would be acceptable (or would it?). That context + would be: in docstrings, within a field list, perhaps only with + certain well-defined field names (parameters, returns). + + * The "constrained context" above isn't really enough to make the + ambiguity acceptable. Instead, a slightly more verbose but far + less ambiguous syntax is possible:: + + term === definition + + This syntax has advantages. Equals signs lend themselves to the + connotation of "definition". And whereas one or two equals signs + are commonly used in program code, three equals signs in a row + have no conflicting meanings that I know of. (Update: there + *are* uses out there.) + + The problem with this approach is that using inline markup for + structure is inherently ambiguous in reStructuredText. For + example, writing *about* definition lists would be difficult:: + + ``term === definition`` is an example of a compact definition list item + + The parser checks for structural markup before it does inline + markup processing. But the "===" should be protected by its inline + literal context. + +3. We could allow the definition on the same line as the term, using + structural markup. A variation on bullet lists would work well:: + + : term :: definition + : another term :: and a definition that + wraps across lines + + Some ambiguity remains:: + + : term ``containing :: double colons`` :: definition + + But the likelihood of such cases is negligible, and they can be + covered in the documentation. + + Other possibilities for the definition delimiter include:: + + : term : classifier -- definition + : term : classifier --- definition + : term : classifier : : definition + : term : classifier === definition + +The third idea currently has the best chance of being adopted and +implemented. + + +Recommendation +`````````````` + +Combining these ideas, the function definition becomes:: + + def max_gas(temperature, pressure, api, specific_gravity=.56): + """ + Computes the maximum dissolved gas in oil using Batzle and + Wang (1992). + + :Parameters: + : temperature : sequence :: Temperature in degrees Celsius + : pressure : sequence :: Pressure in MPa + : api : sequence :: Stock tank oil API + : specific_gravity : sequence :: Specific gravity of gas at + STP, default is .56 + :Returns: + : max_gor : sequence :: Maximum dissolved gas in liters/liter + :Description: This estimate is based on equations given by + Mavko, Mukerji, and Dvorkin, (1998, pp. 218-219, or 2003, + p. 236) obtained originally from Batzle and Wang (1992). + """ + code... + +The docstring is reduced to 14 lines, from the original 24. For +longer docstrings with many parameters and return values, the +difference would be more significant. diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/dev/enthought-rfp.txt b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/dev/enthought-rfp.txt new file mode 100644 index 00000000..5fb72b39 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/dev/enthought-rfp.txt @@ -0,0 +1,146 @@ +================================== + Enthought API Documentation Tool +================================== +----------------------- + Request for Proposals +----------------------- + +:Author: Janet Swisher, Senior Technical Writer +:Organization: `Enthought, Inc. `_ +:Copyright: 2004 by Enthought, Inc. +:License: `Enthought License`_ (BSD Style) + +.. _Enthought License: https://docutils.sourceforge.io/licenses/enthought.txt + +The following is excerpted from the full RFP, and is published here +with permission from `Enthought, Inc.`_ See the `Plan for Enthought +API Documentation Tool`__. + +__ enthought-plan.html + +.. contents:: +.. sectnum:: + + +Requirements +============ + +The documentation tool will address the following high-level goals: + + +Documentation Extraction +------------------------ + +1. Documentation will be generated directly from Python source code, + drawing from the code structure, docstrings, and possibly other + comments. + +2. The tool will extract logical constructs as appropriate, minimizing + the need for comments that are redundant with the code structure. + The output should reflect both documented and undocumented + elements. + + +Source Format +------------- + +1. The docstrings will be formatted in as terse syntax as possible. + Required tags, syntax, and white space should be minimized. + +2. The tool must support the use of Traits. Special comment syntax + for Traits may be necessary. Information about the Traits package + is available at http://code.enthought.com/traits/. In the + following example, each trait definition is prefaced by a plain + comment:: + + __traits__ = { + + # The current selection within the frame. + 'selection' : Trait([], TraitInstance(list)), + + # The frame has been activated or deactivated. + 'activated' : TraitEvent(), + + 'closing' : TraitEvent(), + + # The frame is closed. + 'closed' : TraitEvent(), + } + +3. Support for ReStructuredText (ReST) format is desirable, because + much of the existing docstrings uses ReST. However, the complete + ReST specification need not be supported, if a subset can achieve + the project goals. If the tool does not support ReST, the + contractor should also provide a tool or path to convert existing + docstrings. + + +Output Format +------------- + +1. Documentation will be output as a navigable suite of HTML + files. + +2. The style of the HTML files will be customizable by a cascading + style sheet and/or a customizable template. + +3. Page elements such as headers and footer should be customizable, to + support differing requirements from one documentation project to + the next. + + +Output Structure and Navigation +------------------------------- + +1. The navigation scheme for the HTML files should not rely on frames, + and should harmonize with conversion to Microsoft HTML Help (.chm) + format. + +2. The output should be structured to make navigable the architecture + of the Python code. Packages, modules, classes, traits, and + functions should be presented in clear, logical hierarchies. + Diagrams or trees for inheritance, collaboration, sub-packaging, + etc. are desirable but not required. + +3. The output must include indexes that provide a comprehensive view + of all packages, modules, and classes. These indexes will provide + readers with a clear and exhaustive view of the code base. These + indexes should be presented in a way that is easily accessible and + allows easy navigation. + +4. Cross-references to other documented elements will be used + throughout the documentation, to enable the reader to move quickly + relevant information. For example, where type information for an + element is available, the type definition should be + cross-referenced. + +5. The HTML suite should provide consistent navigation back to the + home page, which will include the following information: + + * Bibliographic information + + - Author + - Copyright + - Release date + - Version number + + * Abstract + + * References + + - Links to related internal docs (i.e., other docs for the same + product) + + - Links to related external docs (e.g., supporting development + docs, Python support docs, docs for included packages) + + It should be possible to specify similar information at the top + level of each package, so that packages can be included as + appropriate for a given application. + + +License +======= + +Enthought intends to release the software under an open-source +("BSD-style") license. diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/dev/hacking.txt b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/dev/hacking.txt new file mode 100644 index 00000000..17f7d710 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/dev/hacking.txt @@ -0,0 +1,264 @@ +========================== + Docutils_ Hacker's Guide +========================== + +:Author: Lea Wiemann +:Contact: docutils-develop@lists.sourceforge.net +:Revision: $Revision$ +:Date: $Date$ +:Copyright: This document has been placed in the public domain. + +:Abstract: This is the introduction to Docutils for all persons who + want to extend Docutils in some way. +:Prerequisites: You have used reStructuredText_ and played around with + the `Docutils front-end tools`_ before. Some (basic) Python + knowledge is certainly helpful (though not necessary, strictly + speaking). + +.. _Docutils: https://docutils.sourceforge.io/ +.. _reStructuredText: https://docutils.sourceforge.io/rst.html +.. _Docutils front-end tools: ../user/tools.html + +.. contents:: + + +Overview of the Docutils Architecture +===================================== + +To give you an understanding of the Docutils architecture, we'll dive +right into the internals using a practical example. + +Consider the following reStructuredText file:: + + My *favorite* language is Python_. + + .. _Python: https://www.python.org/ + +Using the ``rst2html.py`` front-end tool, you would get an HTML output +which looks like this:: + + [uninteresting HTML code removed] + +
+

My favorite language is Python.

+
+ + + +While this looks very simple, it's enough to illustrate all internal +processing stages of Docutils. Let's see how this document is +processed from the reStructuredText source to the final HTML output: + + +Reading the Document +-------------------- + +The **Reader** reads the document from the source file and passes it +to the parser (see below). The default reader is the standalone +reader (``docutils/readers/standalone.py``) which just reads the input +data from a single text file. Unless you want to do really fancy +things, there is no need to change that. + +Since you probably won't need to touch readers, we will just move on +to the next stage: + + +Parsing the Document +-------------------- + +The **Parser** analyzes the the input document and creates a **node +tree** representation. In this case we are using the +**reStructuredText parser** (``docutils/parsers/rst/__init__.py``). +To see what that node tree looks like, we call ``quicktest.py`` (which +can be found in the ``tools/`` directory of the Docutils distribution) +with our example file (``test.txt``) as first parameter (Windows users +might need to type ``python quicktest.py test.txt``):: + + $ quicktest.py test.txt + + + My + + favorite + language is + + Python + . + + +Let us now examine the node tree: + +The top-level node is ``document``. It has a ``source`` attribute +whose value is ``text.txt``. There are two children: A ``paragraph`` +node and a ``target`` node. The ``paragraph`` in turn has children: A +text node ("My "), an ``emphasis`` node, a text node (" language is "), +a ``reference`` node, and again a ``Text`` node ("."). + +These node types (``document``, ``paragraph``, ``emphasis``, etc.) are +all defined in ``docutils/nodes.py``. The node types are internally +arranged as a class hierarchy (for example, both ``emphasis`` and +``reference`` have the common superclass ``Inline``). To get an +overview of the node class hierarchy, use epydoc (type ``epydoc +nodes.py``) and look at the class hierarchy tree. + + +Transforming the Document +------------------------- + +In the node tree above, the ``reference`` node does not contain the +target URI (``https://www.python.org/``) yet. + +Assigning the target URI (from the ``target`` node) to the +``reference`` node is *not* done by the parser (the parser only +translates the input document into a node tree). + +Instead, it's done by a **Transform**. In this case (resolving a +reference), it's done by the ``ExternalTargets`` transform in +``docutils/transforms/references.py``. + +In fact, there are quite a lot of Transforms, which do various useful +things like creating the table of contents, applying substitution +references or resolving auto-numbered footnotes. + +The Transforms are applied after parsing. To see how the node tree +has changed after applying the Transforms, we use the +``rst2pseudoxml.py`` tool: + +.. parsed-literal:: + + $ rst2pseudoxml.py test.txt + + + My + + favorite + language is + + Python + . + + +For our small test document, the only change is that the ``refname`` +attribute of the reference has been replaced by a ``refuri`` +attribute |---| the reference has been resolved. + +While this does not look very exciting, transforms are a powerful tool +to apply any kind of transformation on the node tree. + +By the way, you can also get a "real" XML representation of the node +tree by using ``rst2xml.py`` instead of ``rst2pseudoxml.py``. + + +Writing the Document +-------------------- + +To get an HTML document out of the node tree, we use a **Writer**, the +HTML writer in this case (``docutils/writers/html4css1.py``). + +The writer receives the node tree and returns the output document. +For HTML output, we can test this using the ``rst2html.py`` tool:: + + $ rst2html.py --link-stylesheet test.txt + + + + + + + + + + +
+

My favorite language is Python.

+
+ + + +So here we finally have our HTML output. The actual document contents +are in the fourth-last line. Note, by the way, that the HTML writer +did not render the (invisible) ``target`` node |---| only the +``paragraph`` node and its children appear in the HTML output. + + +Extending Docutils +================== + +Now you'll ask, "how do I actually extend Docutils?" + +First of all, once you are clear about *what* you want to achieve, you +have to decide *where* to implement it |---| in the Parser (e.g. by +adding a directive or role to the reStructuredText parser), as a +Transform, or in the Writer. There is often one obvious choice among +those three (Parser, Transform, Writer). If you are unsure, ask on +the Docutils-develop_ mailing list. + +In order to find out how to start, it is often helpful to look at +similar features which are already implemented. For example, if you +want to add a new directive to the reStructuredText parser, look at +the implementation of a similar directive in +``docutils/parsers/rst/directives/``. + + +Modifying the Document Tree Before It Is Written +------------------------------------------------ + +You can modify the document tree right before the writer is called. +One possibility is to use the publish_doctree_ and +publish_from_doctree_ functions. + +To retrieve the document tree, call:: + + document = docutils.core.publish_doctree(...) + +Please see the docstring of publish_doctree for a list of parameters. + +.. XXX Need to write a well-readable list of (commonly used) options + of the publish_* functions. Probably in api/publisher.txt. + +``document`` is the root node of the document tree. You can now +change the document by accessing the ``document`` node and its +children |---| see `The Node Interface`_ below. + +When you're done with modifying the document tree, you can write it +out by calling:: + + output = docutils.core.publish_from_doctree(document, ...) + +.. _publish_doctree: ../api/publisher.html#publish_doctree +.. _publish_from_doctree: ../api/publisher.html#publish_from_doctree + + +The Node Interface +------------------ + +As described in the overview above, Docutils' internal representation +of a document is a tree of nodes. We'll now have a look at the +interface of these nodes. + +(To be completed.) + + +What Now? +========= + +This document is not complete. Many topics could (and should) be +covered here. To find out with which topics we should write about +first, we are awaiting *your* feedback. So please ask your questions +on the Docutils-develop_ mailing list. + + +.. _Docutils-develop: ../user/mailing-lists.html#docutils-develop + + +.. |---| unicode:: 8212 .. em-dash + :trim: + + +.. + Local Variables: + mode: indented-text + indent-tabs-mode: nil + sentence-end-double-space: t + fill-column: 70 + End: diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/dev/policies.txt b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/dev/policies.txt new file mode 100644 index 00000000..76fd8b31 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/dev/policies.txt @@ -0,0 +1,686 @@ +=========================== + Docutils Project Policies +=========================== + +:Author: David Goodger; open to all Docutils developers +:Contact: docutils-develop@lists.sourceforge.net +:Date: $Date$ +:Revision: $Revision$ +:Copyright: This document has been placed in the public domain. + +.. contents:: + +The Docutils project group is a meritocracy based on code contribution +and lots of discussion [#bcs]_. A few quotes sum up the policies of +the Docutils project. The IETF's classic credo (by MIT professor Dave +Clark) is an ideal we can aspire to: + + We reject: kings, presidents, and voting. We believe in: rough + consensus and running code. + +As architect, chief cook and bottle-washer, David Goodger currently +functions as BDFN (Benevolent Dictator For Now). (But he would +happily abdicate the throne given a suitable candidate. Any takers?) + +Eric S. Raymond, anthropologist of the hacker subculture, writes in +his essay `The Magic Cauldron`_: + + The number of contributors [to] projects is strongly and inversely + correlated with the number of hoops each project makes a user go + through to contribute. + +We will endeavour to keep the barrier to entry as low as possible. +The policies below should not be thought of as barriers, but merely as +a codification of experience to date. These are "best practices"; +guidelines, not absolutes. Exceptions are expected, tolerated, and +used as a source of improvement. Feedback and criticism is welcome. + +As for control issues, Emmett Plant (CEO of the Xiph.org Foundation, +originators of Ogg Vorbis) put it well when he said: + + Open source dictates that you lose a certain amount of control + over your codebase, and that's okay with us. + +.. [#bcs] Phrase borrowed from `Ben Collins-Sussman of the Subversion + project `__. + +.. _The Magic Cauldron: + http://www.catb.org/~esr/writings/magic-cauldron/ + + +Python Coding Conventions +========================= + +Contributed code will not be refused merely because it does not +strictly adhere to these conditions; as long as it's internally +consistent, clean, and correct, it probably will be accepted. But +don't be surprised if the "offending" code gets fiddled over time to +conform to these conventions. + +The Docutils project shall follow the generic coding conventions as +specified in the `Style Guide for Python Code`_ and `Docstring +Conventions`_ PEPs, summarized, clarified, and extended as follows: + +* 4 spaces per indentation level. No hard tabs. + +* Use only 7-bit ASCII, no 8-bit strings. See `Docutils + Internationalization`_. + +* No one-liner compound statements (i.e., no ``if x: return``: use two + lines & indentation), except for degenerate class or method + definitions (i.e., ``class X: pass`` is OK.). + +* Lines should be no more than 78 characters long. + +* Use "StudlyCaps" for class names (except for element classes in + docutils.nodes). + +* Use "lowercase" or "lowercase_with_underscores" for function, + method, and variable names. For short names, maximum two words, + joined lowercase may be used (e.g. "tagname"). For long names with + three or more words, or where it's hard to parse the split between + two words, use lowercase_with_underscores (e.g., + "note_explicit_target", "explicit_target"). If in doubt, use + underscores. + +* Avoid lambda expressions, which are inherently difficult to + understand. Named functions are preferable and superior: they're + faster (no run-time compilation), and well-chosen names serve to + document and aid understanding. + +* Avoid functional constructs (filter, map, etc.). Use list + comprehensions instead. + +* Avoid ``from __future__ import`` constructs. They are inappropriate + for production code. + +* Use 'single quotes' for string literals, and """triple double + quotes""" for docstrings. + +.. _Style Guide for Python Code: + https://peps.python.org/pep-0008 +.. _Docstring Conventions: https://peps.python.org/pep-0257 +.. _Docutils Internationalization: ../howto/i18n.html#python-code + + +Documentation Conventions +========================= + +* Docutils documentation is written using reStructuredText, of course. + +* Use 7-bit ASCII if at all possible, and Unicode substitutions when + necessary. + +* Use the following section title adornment styles:: + + ================ + Document Title + ================ + + -------------------------------------------- + Document Subtitle, or Major Division Title + -------------------------------------------- + + Section + ======= + + Subsection + ---------- + + Sub-Subsection + `````````````` + + Sub-Sub-Subsection + .................. + +* Use two blank lines before each section/subsection/etc. title. One + blank line is sufficient between immediately adjacent titles. + +* Add a bibliographic field list immediately after the document + title/subtitle. See the beginning of this document for an example. + +* Add an Emacs "local variables" block in a comment at the end of the + document. See the end of this document for an example. + + +Copyrights and Licensing +======================== + +The majority of the Docutils project code and documentation has been +placed in the public domain (see `Copying Docutils`_). + +Unless clearly and explicitly indicated +otherwise, any patches (modifications to existing files) submitted to +the project for inclusion (via Subversion, SourceForge trackers, +mailing lists, or private email) are assumed to be in the public +domain as well. + +Any new files contributed to the project should clearly state their +intentions regarding copyright, in one of the following ways: + +* Public domain (preferred): include the statement "This + module/document has been placed in the public domain." + +* Copyright & open source license: include a copyright notice, along + with either an embedded license statement, a reference to an + accompanying license file, or a license URL. + + The license should be well known, simple and compatible with other + open source software licenses. To keep the number of different + licenses at a minimum, using the `2-Clause BSD license`_ + (`local copy`__) is recommended. + + .. Rationale: + + clear wording, structured text + + license used by the closely related Sphinx project + +.. _Copying Docutils: ../../COPYING.html +.. _2-Clause BSD license: http://opensource.org/licenses/BSD-2-Clause +__ ../../licenses/BSD-2-Clause.txt + + +.. _Subversion Repository: + +Repository +========== + +Please see the `repository documentation`_ for details on how to +access Docutils' Subversion repository. Anyone can access the +repository anonymously. Only project developers can make changes. +(If you would like to become a project developer, just ask!) Also see +`Setting Up For Docutils Development`_ below for some useful info. + +Unless you really *really* know what you're doing, please do *not* use +``svn import``. It's quite easy to mess up the repository with an +import. + +.. _repository documentation: repository.html + + +Branches +-------- + +(These branch policies go into effect with Docutils 0.4.) + +The "docutils" directory of the **trunk** (a.k.a. the **Docutils +core**) is used for active -- but stable, fully tested, and reviewed +-- development. + +If we need to cut a bugfix release, we'll create a **maintenance branch** +based on the latest feature release. For example, when Docutils 0.5 is +released, this would be ``branches/docutils-0.5``, and any existing 0.4.x +maintenance branches may be retired. Maintenance branches will receive bug +fixes only; no new features will be allowed here. + +Obvious and uncontroversial bug fixes *with tests* can be checked in +directly to the core and to the maintenance branches. Don't forget to +add test cases! Many (but not all) bug fixes will be applicable both +to the core and to the maintenance branches; these should be applied +to both. No patches or dedicated branches are required for bug fixes, +but they may be used. It is up to the discretion of project +developers to decide which mechanism to use for each case. + +.. _feature branches: +.. _feature branch: + +Feature additions and API changes will be done in **feature +branches**. Feature branches will not be managed in any way. +Frequent small check-ins are encouraged here. Feature branches must be +discussed on the `docutils-develop mailing list`_ and reviewed before +being merged into the core. + +.. _docutils-develop mailing list: + https://lists.sourceforge.net/lists/listinfo/docutils-develop + + +Review Criteria +``````````````` + +Before a new feature, an API change, or a complex, disruptive, or +controversial bug fix can be checked in to the core or into a +maintenance branch, it must undergo review. These are the criteria: + +* The branch must be complete, and include full documentation and + tests. + +* There should ideally be one branch merge commit per feature or + change. In other words, each branch merge should represent a + coherent change set. + +* The code must be stable and uncontroversial. Moving targets and + features under debate are not ready to be merged. + +* The code must work. The test suite must complete with no failures. + See `Docutils Testing`_. + +The review process will ensure that at least one other set of eyeballs +& brains sees the code before it enters the core. In addition to the +above, the general `Check-ins`_ policy (below) also applies. + +.. _Docutils testing: testing.html + + +Check-ins +--------- + +Changes or additions to the Docutils core and maintenance branches +carry a commitment to the Docutils user community. Developers must be +prepared to fix and maintain any code they have committed. + +The Docutils core (``trunk/docutils`` directory) and maintenance +branches should always be kept in a stable state (usable and as +problem-free as possible). All changes to the Docutils core or +maintenance branches must be in `good shape`_, usable_, documented_, +tested_, and `reasonably complete`_. Starting with version 1.0, they must +also comply with the `backwards compatibility policy`_. + +* _`Good shape` means that the code is clean, readable, and free of + junk code (unused legacy code; by analogy to "junk DNA"). + +* _`Usable` means that the code does what it claims to do. An "XYZ + Writer" should produce reasonable XYZ output. + +* _`Documented`: The more complete the documentation the better. + Modules & files must be at least minimally documented internally. + `Docutils Front-End Tools`_ should have a new section for any + front-end tool that is added. `Docutils Configuration Files`_ + should be modified with any settings/options defined. For any + non-trivial change, the HISTORY.txt_ file should be updated. + +* _`Tested` means that unit and/or functional tests, that catch all + bugs fixed and/or cover all new functionality, have been added to + the test suite. These tests must be checked by running the test + suite under all supported Python versions, and the entire test suite + must pass. See `Docutils Testing`_. + +* _`Reasonably complete` means that the code must handle all input. + Here "handle" means that no input can cause the code to fail (cause + an exception, or silently and incorrectly produce nothing). + "Reasonably complete" does not mean "finished" (no work left to be + done). For example, a writer must handle every standard element + from the Docutils document model; for unimplemented elements, it + must *at the very least* warn that "Output for element X is not yet + implemented in writer Y". + +If you really want to check code directly into the Docutils core, +you can, but you must ensure that it fulfills the above criteria +first. People will start to use it and they will expect it to work! +If there are any issues with your code, or if you only have time for +gradual development, you should put it on a branch or in the sandbox +first. It's easy to move code over to the Docutils core once it's +complete. + +It is the responsibility and obligation of all developers to keep the +Docutils core and maintenance branches stable. If a commit is made to +the core or maintenance branch which breaks any test, the solution is +simply to revert the change. This is not vindictive; it's practical. +We revert first, and discuss later. + +Docutils will pursue an open and trusting policy for as long as +possible, and deal with any aberrations if (and hopefully not when) +they happen. We'd rather see a torrent of loose contributions than +just a trickle of perfect-as-they-stand changes. The occasional +mistake is easy to fix. That's what version control is for! + +.. _Docutils Front-End Tools: ../user/tools.html +.. _Docutils Configuration Files: ../user/config.html +.. _HISTORY.txt: ../../HISTORY.txt + + +.. _`Version Numbering`: + +Version Identification +====================== + +The state of development of the current Docutils codebase is stored in +two forms: the sequence `docutils.__version_info__`_ and the +`PEP 440`_ conformant text string `docutils.__version__`_. +See also the `Docutils Release Procedure`_ + +.. _Docutils Release Procedure: release.html#version-numbers + + +``docutils.__version_info__`` +----------------------------- + +``docutils.__version_info__`` is an instance of ``docutils.VersionInfo`` +based on collections.namedtuple_. It is modelled on `sys.version_info`_ +and has the following attributes: + +major : non-negative integer + **Major releases** (x.0, e.g. 1.0) will be rare, and will + represent major changes in API, functionality, or commitment. The + major number will be bumped to 1 when the project is + feature-complete, and may be incremented later if there is a major + change in the design or API. When Docutils reaches version 1.0, + the major APIs will be considered frozen. + For details, see the `backwards compatibility policy`_. + +minor : non-negative integer + Releases that change the minor number (x.y, e.g. 0.5) will be + **feature releases**; new features from the `Docutils core`_ will + be included. + +micro : non-negative integer + Releases that change the micro number (x.y.z, e.g. 0.4.1) will be + **bug-fix releases**. No new features will be introduced in these + releases; only bug fixes will be included. + + The micro number is omitted from `docutils.__version__`_ when it + equals zero. + +_`releaselevel` : text string + The release level indicates the `development status`_ (or phase) + of the project's codebase: + + ============= ========== =============================================== + Release Level Label [#]_ Description + ============= ========== =============================================== + alpha ``a`` Reserved for use after major experimental + changes, to indicate an unstable codebase. + + beta ``b`` Indicates active development, between releases. + + candidate ``rc`` Release candidate: indicates that the + codebase is ready to release unless + significant bugs emerge. + + final Indicates an official project release. + ============= ========== =============================================== + + .. [#] The labels are used in the `docutils.__version__`_ pre-release + segment. + + .. _development status: + https://en.wikipedia.org/wiki/Software_release_life_cycle + +_`serial` : non-negative integer + The serial number is zero for final releases and incremented + whenever a new pre-release is begun. + +_`release` : boolean + True for official releases and pre-releases, False during + development. + +* One of *{major, minor, micro, serial}* is incremented after each + release, and the lower-order numbers are reset to 0. + +* The default state of the repository during active development is + release level = "beta", serial = 0, release = False. + +``docutils.__version_info__`` can be used to test for a minimally +required version, e.g. :: + + docutils.__version_info__ >= (0, 13) + +is True for all versions after ``"0.13"``. + +.. _collections.namedtuple: + https://docs.python.org/3/library/collections.html#collections.namedtuple +.. _sys.version_info: + https://docs.python.org/3/library/sys.html#sys.version_info + +``docutils.__version__`` +------------------------ + +The text string ``docutils.__version__`` is a human readable, +`PEP 440`_-conforming version specifier. For version comparison +operations, use `docutils.__version_info__`_. + +``docutils.__version__`` takes the following form:: + + ".[.][[]][.dev]" + <--- release segment ---><-- pre-release segment -><- development -> + +* The *pre-release segment* contains a label representing the + releaselevel_ ("a", "b", or "rc") and eventually a serial_ number + (omitted, if zero). + +* The *development segment* is ``".dev"`` during active development + (release_ == False) and omitted for official releases and pre-releases. + +Examples of ``docutils.__version__`` identifiers, over the course of +normal development (without branches), in ascending order: + +============================== ============================= +Release Level Version Identifier +============================== ============================= +final (release) 0.14 +beta (development) [#dev]_ 0.15b.dev +beta (release) [#skip]_ 0.15b +candidate 1 (dev.) 0.15rc1.dev +candidate 1 (release) 0.15rc1 +candidate 2 (dev.) [#skip]_ 0.15rc2.dev +candidate 2 (release) [#skip]_ 0.15rc2 +... +final (release) 0.15 +beta (development) [#dev]_ 0.16b.dev +============================== ============================= + +.. [#dev] Default active development state between releases. +.. [#skip] These steps may be skipped. + +.. _PEP 440: https://peps.python.org/pep-0440/ + +Policy History +-------------- + +* Prior to version 0.4, Docutils didn't have an official version + numbering policy, and micro releases contained both bug fixes and + new features. + +* An earlier version of this policy was adopted in October 2005, and + took effect with Docutils version 0.4. + +* This policy was updated in June 2017 for Docutils version 0.14. See + `Feature Request #50`_ and the `discussion on docutils-devel`__ from + May 28 to June 20 2017. + + .. _Feature Request #50: + https://sourceforge.net/p/docutils/feature-requests/50/ + __ https://sourceforge.net/p/docutils/mailman/message/35903816/ + + +Backwards Compatibility Policy +============================== + +.. note:: The backwards compatibility policy outlined below is a stub. + +Docutils' backwards compatibility policy follows the rules for Python in +:PEP:`387`. + +* The scope of the public API is laid out at the start of the `backwards + compatibility rules`_. + +* The rules for `making incompatible changes`_ apply. + +A majority of projects depends on Docutils indirectly, via the Sphinx_ +document processor. + +* Sphinx developers should be given the chance to fix or work around a + DeprecationWarning_ in the Sphinx development version before a new + Docutils version is released. Otherwise, use a PendingDeprecationWarning_. + +Changes that may affect end-users (e.g. by requiring changes to the +configuration file or potentially breaking custom style sheets) should be +announced with a FutureWarning_. + +.. _backwards compatibility rules: + https://peps.python.org/pep-0387/#backwards-compatibility-rules +.. _making incompatible changes: + https://peps.python.org/pep-0387/#making-incompatible-changes +.. _Sphinx: https://www.sphinx-doc.org/ +.. _DeprecationWarning: + https://docs.python.org/3/library/exceptions.html#DeprecationWarning +.. _PendingDeprecationWarning: + https://docs.python.org/3/library/exceptions.html#PendingDeprecationWarning +.. _FutureWarning: + https://docs.python.org/3/library/exceptions.html#FutureWarning + + +Snapshots +========= + +Snapshot tarballs can be downloaded from the repository (see the "download +snapshot" button in the head of the code listing table). + +* the `Docutils core`_, representing the current cutting-edge state of + development; + +* the `sandbox directory`_ with contributed projects and extensions from + `the Sandbox`_; + +.. * maintenance branches, for bug fixes; + + TODO: do we have active maintenance branches? + (the only branch looking like a maintenance branch is + https://sourceforge.net/p/docutils/code/HEAD/tree/branches/docutils-0.4) + +* `development branches`_, representing ongoing development efforts to bring + new features into Docutils. + +.. _Docutils core: + https://sourceforge.net/p/docutils/code/HEAD/tree/trunk/docutils +.. _development branches: + https://sourceforge.net/p/docutils/code/HEAD/tree/branches/ + + +Setting Up For Docutils Development +=================================== + +When making changes to the code, testing_ is a must. The code should +be run to verify that it produces the expected results, and the entire +test suite should be run too. The modified Docutils code has to be +accessible to Python for the tests to have any meaning. +See `editable installs`_ for ways to keep the Docutils code +accessible during development. + +.. _testing: tested_ +.. _editable installs: repository.html#editable-installs + + +Mailing Lists +============= + +Developers are recommended to subscribe to all `Docutils mailing +lists`_. + +.. _Docutils mailing lists: ../user/mailing-lists.html + + +The Wiki +======== + +There is a development wiki at http://docutils.python-hosting.com/ as +a scratchpad for transient notes. Please use the repository for +permanent document storage. + +Extensions and Related Projects +=============================== + +The Sandbox +----------- + +The `sandbox directory`_ is a place to play around, to try out and +share ideas. It's a part of the Subversion repository but it isn't +distributed as part of Docutils releases. Feel free to check in code +to the sandbox; that way people can try it out but you won't have to +worry about it working 100% error-free, as is the goal of the Docutils +core. A project-specific subdirectory should be created for each new +project. Any developer who wants to play in the sandbox may do so, +but project directories are recommended over personal directories, +which discourage collaboration. It's OK to make a mess in the +sandbox! But please, play nice. + +Please update the `sandbox README`_ file with links and a brief +description of your work. + +In order to minimize the work necessary for others to install and try +out new, experimental components, the following sandbox directory +structure is recommended:: + + sandbox/ + project_name/ # For a collaborative project. + README.txt # Describe the requirements, purpose/goals, usage, + # and list the maintainers. + docs/ + ... + component.py # The component is a single module. + # *OR* (but *not* both) + component/ # The component is a package. + __init__.py # Contains the Reader/Writer class. + other1.py # Other modules and data files used + data.txt # by this component. + ... + test/ # Test suite. + ... + tools/ # For front ends etc. + ... + setup.py # Install the component code and tools/ files + # into the right places. + userid/ # For *temporary* personal space. + +Some sandbox projects are destined to move to the Docutils core once +completed. Others, such as add-ons to Docutils or applications of +Docutils, may graduate to become `parallel projects`_. + +.. _sandbox README: https://docutils.sourceforge.io/sandbox/README.html +.. _sandbox directory: + https://sourceforge.net/p/docutils/code/HEAD/tree/trunk/sandbox/ + + +.. _parallel project: + +Parallel Projects +----------------- + +Parallel projects contain useful code that is not central to the +functioning of Docutils. Examples are specialized add-ons or +plug-ins, and applications of Docutils. They use Docutils, but +Docutils does not require their presence to function. + +An official parallel project will have its own directory beside (or +parallel to) the main ``docutils`` directory in the Subversion +repository. It can have its own web page in the +docutils.sourceforge.io domain, its own file releases and +downloadable snapshots, and even a mailing list if that proves useful. +However, an official parallel project has implications: it is expected +to be maintained and continue to work with changes to the core +Docutils. + +A parallel project requires a project leader, who must commit to +coordinate and maintain the implementation: + +* Answer questions from users and developers. +* Review suggestions, bug reports, and patches. +* Monitor changes and ensure the quality of the code and + documentation. +* Coordinate with Docutils to ensure interoperability. +* Put together official project releases. + +Of course, related projects may be created independently of Docutils. +The advantage of a parallel project is that the SourceForge +environment and the developer and user communities are already +established. Core Docutils developers are available for consultation +and may contribute to the parallel project. It's easier to keep the +projects in sync when there are changes made to the core Docutils +code. + +Other related projects +---------------------- + +Many related but independent projects are listed in the Docutils +`link list`_. If you want your project to appear there, drop a note at +the Docutils-develop_ mailing list. + +.. _link list: https://docutils.sourceforge.io/docs/user/links.html +.. _docutils-develop: docs/user/mailing-lists.html#docutils-develop + + +.. + Local Variables: + mode: indented-text + indent-tabs-mode: nil + sentence-end-double-space: t + fill-column: 70 + End: diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/dev/pysource.txt b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/dev/pysource.txt new file mode 100644 index 00000000..e0fdc0bb --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/dev/pysource.txt @@ -0,0 +1,131 @@ +====================== + Python Source Reader +====================== +:Author: David Goodger +:Contact: docutils-develop@lists.sourceforge.net +:Revision: $Revision$ +:Date: $Date$ +:Copyright: This document has been placed in the public domain. + +This document explores issues around extracting and processing +docstrings from Python modules. + +For definitive element hierarchy details, see the "Python Plaintext +Document Interface DTD" XML document type definition, pysource.dtd_ +(which modifies the generic docutils.dtd_). Descriptions below list +'DTD elements' (XML 'generic identifiers' or tag names) corresponding +to syntax constructs. + + +.. contents:: + + +Model +===== + +The Python Source Reader ("PySource") model that's evolving in my mind +goes something like this: + +1. Extract the docstring/namespace [#]_ tree from the module(s) and/or + package(s). + + .. [#] See `Docstring Extractor`_ below. + +2. Run the parser on each docstring in turn, producing a forest of + doctrees (per nodes.py). + +3. Join the docstring trees together into a single tree, running + transforms: + + - merge hyperlinks + - merge namespaces + - create various sections like "Module Attributes", "Functions", + "Classes", "Class Attributes", etc.; see pysource.dtd_ + - convert the above special sections to ordinary doctree nodes + +4. Run transforms on the combined doctree. Examples: resolving + cross-references/hyperlinks (including interpreted text on Python + identifiers); footnote auto-numbering; first field list -> + bibliographic elements. + + (Or should step 4's transforms come before step 3?) + +5. Pass the resulting unified tree to the writer/builder. + +I've had trouble reconciling the roles of input parser and output +writer with the idea of modes ("readers" or "directors"). Does the +mode govern the transformation of the input, the output, or both? +Perhaps the mode should be split into two. + +For example, say the source of our input is a Python module. Our +"input mode" should be the "Python Source Reader". It discovers (from +``__docformat__``) that the input parser is "reStructuredText". If we +want HTML, we'll specify the "HTML" output formatter. But there's a +piece missing. What *kind* or *style* of HTML output do we want? +PyDoc-style, LibRefMan style, etc. (many people will want to specify +and control their own style). Is the output style specific to a +particular output format (XML, HTML, etc.)? Is the style specific to +the input mode? Or can/should they be independent? + +I envision interaction between the input parser, an "input mode" , and +the output formatter. The same intermediate data format would be used +between each of these, being transformed as it progresses. + + +Docstring Extractor +=================== + +We need code that scans a parsed Python module, and returns an ordered +tree containing the names, docstrings (including attribute and +additional docstrings), and additional info (in parentheses below) of +all of the following objects: + +- packages +- modules +- module attributes (+ values) +- classes (+ inheritance) +- class attributes (+ values) +- instance attributes (+ values) +- methods (+ formal parameters & defaults) +- functions (+ formal parameters & defaults) + +(Extract comments too? For example, comments at the start of a module +would be a good place for bibliographic field lists.) + +In order to evaluate interpreted text cross-references, namespaces for +each of the above will also be required. + +See python-dev/docstring-develop thread "AST mining", started on +2001-08-14. + + +Interpreted Text +================ + +DTD elements: package, module, class, method, function, +module_attribute, class_attribute, instance_attribute, variable, +parameter, type, exception_class, warning_class. + +To classify identifiers explicitly, the role is given along with the +identifier in either prefix or suffix form:: + + Use :method:`Keeper.storedata` to store the object's data in + `Keeper.data`:instance_attribute:. + +The role may be one of 'package', 'module', 'class', 'method', +'function', 'module_attribute', 'class_attribute', +'instance_attribute', 'variable', 'parameter', 'type', +'exception_class', 'exception', 'warning_class', or 'warning'. Other +roles may be defined. + +.. _pysource.dtd: pysource.dtd +.. _docutils.dtd: ../ref/docutils.dtd + + + +.. + Local Variables: + mode: indented-text + indent-tabs-mode: nil + fill-column: 70 + End: diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/dev/release.txt b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/dev/release.txt new file mode 100644 index 00000000..4d07186b --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/dev/release.txt @@ -0,0 +1,127 @@ +============================= + Docutils_ Release Procedure +============================= + +:Authors: David Goodger; Lea Wiemann; open to all Docutils developers +:Contact: docutils-develop@lists.sourceforge.net +:Date: $Date$ +:Revision: $Revision$ +:Copyright: This document has been placed in the public domain. + +.. _Docutils: https://docutils.sourceforge.io/ + +Releasing (post 2020) +--------------------- + +* Announce the upcoming release on docutils-develop list. + + Consider **feature freeze** or/and **check-in freeze** . + +* Update RELEASE-NOTES.txt add section ``Release ``. + + Consult HISTORY.txt for important changes. + +* Change HISTORY.txt title ``Changes Since `` to ``Release ``. + +* Set new version with ``sandbox/infrastructure/set_version.sh `` + + Check what was changed with version control system by ``set_version.sh`` + + Run tests :: + + export PYTHONWARNINGS=default + python3 test/alltests.py + + or use tox. + + ``export PYTHONWARNINGS=default`` prints DeprecationWarnings in python3. + +* Generate universal wheel and source-distribution:: + + python3 setup.py sdist + python3 setup.py bdist_wheel --universal + +* Upload universal wheel and source to test.pypi:: + + python3 -m twine upload --repository-url https://test.pypi.org/legacy/ dist/* + + Test in venv :: + + python3 -m venv du3 ; cd du3 + export PYTHONPATH= ; . bin/activate + + python -m pip install --index-url https://test.pypi.org/simple/ --no-deps docutils + + cp -Lr ../docutils-code/docutils/test . + python test/alltests.py + + python -m pip uninstall docutils + deactivate ; cd .. ; rm -r du3 + +* Commit changes ... the changed version number. + +* tag 0.## (Note: only directory docutils is copied):: + + svn copy svn+ssh://grubert@svn.code.sf.net/p/docutils/code/trunk/docutils \ + svn+ssh://grubert@svn.code.sf.net/p/docutils/code/tags/docutils-0.## \ + -m "tagging release 0.##" + +* Update your source directory. +* Rebuild universal wheel and source-distribution :: + + python3 setup.py sdist + python3 setup.py bdist_wheel --universal + +* Now upload to pypi:: + + python3 -m twine upload dist/docutils-0.##* + +* Remove previous package from local cache:: + + find .cache/pip/wheels -name docutils\*whl -exec rm -v -i {} \; + +* and test:: + + python3 -m venv du3 ; cd du3 + export PYTHONPATH= ; . bin/activate + + pip install --no-deps docutils + cp -Lr ../docutils-code/docutils/test . + python test/alltests.py + + deactivate ; cd .. ; rm -r du3 + +* Notify to docutils-developer and user. + +* upload source and generated html to sf-htdocs/0.## :: + + mkdir tmp1 + cd tmp1 + tar xzvf ../dist/docutils-0.##.tar.gz + cd docutils-0.##/ + tools/buildhtml.py . + find . -name \*.pyc -exec rm -v {} \; + find . -name __pycache__ -exec rmdir -v {} \; + rm -r docutils.egg-info + rsync -e ssh -r -t ./ web.sourceforge.net:/home/project-web/docutils/htdocs/0.## + +* Check web/index.txt for necessary corrections. +* Run sandbox/infrastructure/docutils-update.local to update web-content. +* Release to sourceforge. + + - Upload tar.gz and 0.16 release notes to sourceforge. + - Select docutils-0.16.tar.gz as default for all OS. + +* set_version 0.#.#+1b.dev +* test with py3 +* docutils/HISTORY.txt: add title "Changes Since 0.##" +* run sandbox/infrastructure/docutils-update.local + + +.. + Local Variables: + mode: indented-text + indent-tabs-mode: nil + sentence-end-double-space: t + fill-column: 70 + End: diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/dev/repository.txt b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/dev/repository.txt new file mode 100644 index 00000000..7516959b --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/dev/repository.txt @@ -0,0 +1,313 @@ +===================================== + The Docutils_ Version Repository +===================================== + +:Author: Lea Wiemann, Docutils developers +:Contact: docutils-develop@lists.sourceforge.net +:Revision: $Revision$ +:Date: $Date$ +:Copyright: This document has been placed in the public domain. + +.. _Docutils: https://docutils.sourceforge.io/ + +.. admonition:: Quick Instructions + + To get a checkout of the Docutils source tree (with the + sandboxes) with SVN_, type :: + + svn checkout https://svn.code.sf.net/p/docutils/code/trunk docutils-code + + Users of Git_ can clone a mirror of the docutils repository with :: + + git clone git://repo.or.cz/docutils.git + + If you are going to commit changes to the repository, please read + the **whole document**, especially the section "`Information for + Developers`_"! + +Docutils uses a Subversion_ (SVN) repository located at +``docutils.svn.sourceforge.net``. + +While Unix and Mac OS X users will probably prefer the standard +Subversion command line interface, Windows user may want to try +TortoiseSVN_, a convenient explorer extension. The instructions apply +analogously. + +There is a Git_ mirror at http://repo.or.cz/docutils.git providing +`web access`_ and the base for `creating a local Git clone`_. +[#github-mirrors]_ + +For the project policy on repository use (check-in requirements, +branching, etc.), please see the `Docutils Project Policies`__. + +__ policies.html#subversion-repository + +.. _SVN: +.. _Subversion: https://subversion.apache.org/ +.. _TortoiseSVN: https://tortoisesvn.net/ +.. _SourceForge.net: https://sourceforge.net/ +.. _Git: http://git-scm.com/ + +.. contents:: + + +Accessing the Repository +======================== + +Web Access +---------- + +The repository can be browsed and examined via the web at +https://sourceforge.net/p/docutils/code. + +Alternatively, use the web interface at http://repo.or.cz/docutils.git. +[#github-mirrors]_ + +.. [#github-mirrors] There are also 3rd-party mirrors and forks at + GitHub, some of them orphaned. At the time of this writing (2021-11-03), + https://github.com/live-clones/docutils/tree/master/docutils + provides an hourly updated clone. + +Repository Access Methods +------------------------- + +To get a checkout, first determine the root of the repository depending +on your preferred protocol: + +anonymous access: (read only) + Subversion_: ``https://svn.code.sf.net/p/docutils/code`` + + Git_: ``git://repo.or.cz/docutils.git`` + +`developer access`_: (read and write) + ``svn+ssh://@svn.code.sf.net/p/docutils/code`` + +Checking Out the Repository +--------------------------- + +.. _creating a local Git clone: + +Git_ users can clone a mirror of the docutils repository with :: + + git clone git://repo.or.cz/docutils.git + +and proceed according to the `Git documentation`_. +Developer access (read and write) is possible with `git svn`_. + +.. _Git documentation: https://git.wiki.kernel.org/index.php/GitDocumentation +.. _git svn: https://git.wiki.kernel.org/index.php/Git-svn + +Subversion_ users can use the following commands +(substitute your preferred repository root for ROOT): + +* To check out only the current main source tree of Docutils, type :: + + svn checkout ROOT/trunk/docutils + +* To check out everything (main tree, sandboxes, web site, and parallel + projects), type :: + + svn checkout ROOT/trunk docutils + + This will create a working copy of the whole trunk in a new directory + called ``docutils``. + +Note that you probably do *not* want to check out the ROOT itself +(without "/trunk"), because then you'd end up fetching the whole +Docutils tree for every branch and tag over and over again. + +To update your working copy later on, ``cd`` into the working copy and +type :: + + svn update + +Switching the Repository Root +----------------------------- + +If you changed your mind and want to use a different repository root, +``cd`` into your working copy and type:: + + svn switch --relocate OLDROOT NEWROOT + + +Editable installs +================= + +There are several ways to ensure that edits to the Docutils code are +picked up by Python. +We'll assume that the Docutils "trunk" is checked out under the +``~/projects/`` directory. + +1. Do an `editable install`__ with pip_:: + + python3 -m pip install -e ~/projects/docutils/docutils + + __ https://pip.pypa.io/en/stable/cli/pip_install/#editable-installs + +2. Install in `development mode`__ with setuptools_. + + __ https://setuptools.pypa.io/en/latest/userguide/development_mode.html + #development-mode + + .. _install manually: + +3. Install "manually". + + Ensure that the "docutils" package is in ``sys.path`` by + one of the following actions: + + * Set the ``PYTHONPATH`` environment variable so that Python + picks up your local working copy of the code. + + For the bash shell, add this to your ``~/.profile``:: + + PYTHONPATH=$HOME/projects/docutils/docutils + export PYTHONPATH + + The first line points to the directory containing the ``docutils`` + package. The second line exports the environment variable. + + * Create a symlink to the docutils package directory somewhere in the + module search path (``sys.path``), e.g., :: + + ln -s ~/projects/docutils/docutils \ + /usr/local/lib/python3.9/dist-packages/ + + * Use a `path configuration file`__. + + __ https://docs.python.org/library/site.html + + Optionally, add some or all `front-end tools`_ + to the binary search path, e.g., + add the ``tools`` directory to the ``PATH`` variable:: + + PATH=$PATH:$HOME/projects/docutils/docutils/tools + export PATH + + or link idividual front-end tools to a suitable place + in the binary path:: + + ln -s ~/projects/docutils/docutils/tools/docutils-cli.py \ + /usr/local/bin/docutils + +5. Reinstall Docutils after any change:: + + python3 setup.py install + + .. CAUTION:: + + This method is **not** recommended for day-to-day development; + it's too easy to forget. Confusion inevitably ensues. + + If you install Docutils this way, Python will always pick up the + last-installed copy of the code. If you ever forget to + reinstall the "docutils" package, Python won't see your latest + changes. + +A useful addition to the ``docutils`` top-level directory in branches +and alternate copies of the code is a ``set-PATHS`` file +containing the following lines:: + + # source this file + export PYTHONPATH=$PWD:$PWD + export PATH=$PWD/tools:$PATH + +Open a shell for this branch, ``cd`` to the ``docutils`` top-level +directory, and "source" this file. For example, using the bash +shell:: + + $ cd some-branch/docutils + $ . set-PATHS + +.. _pip: https://pypi.org/project/pip/ +.. _setuptools: https://pypi.org/project/setuptools/ +.. _front-end tools: ../user/tools.html + + +.. _developer access: + +Information for Developers +========================== + +If you would like to have write access to the repository, register +with SourceForge.net_ and send your SourceForge.net +user names to docutils-develop@lists.sourceforge.net. +(Note that there may be a delay of several hours until you can commit +changes to the repository.) + +Sourceforge SVN access is documented `here`__ + +__ https://sourceforge.net/p/forge/documentation/svn/ + + +Ensure any changes comply with the `Docutils Project Policies`_ +before `checking in`_, + +.. _Docutils Project Policies: policies.html +.. _checking in: policies.html#check-ins + + +Setting Up Your Subversion Client For Development +------------------------------------------------- + +Before committing changes to the repository, please ensure that the +following lines are contained (and uncommented) in your local +~/.subversion/config file, so that new files are added with the +correct properties set:: + + [miscellany] + # For your convenience: + global-ignores = ... *.pyc ... + # For correct properties: + enable-auto-props = yes + + [auto-props] + *.py = svn:eol-style=native;svn:keywords=Author Date Id Revision + *.txt = svn:eol-style=native;svn:keywords=Author Date Id Revision + *.html = svn:eol-style=native;svn:keywords=Author Date Id Revision + *.xml = svn:eol-style=native;svn:keywords=Author Date Id Revision + *.tex = svn:eol-style=native;svn:keywords=Author Date Id Revision + *.css = svn:eol-style=native;svn:keywords=Author Date Id Revision + *.patch = svn:eol-style=native + *.sh = svn:eol-style=native;svn:executable;svn:keywords=Author Date Id Revision + *.png = svn:mime-type=image/png + *.jpg = svn:mime-type=image/jpeg + *.gif = svn:mime-type=image/gif + + +Repository Layout +================= + +The following tree shows the repository layout:: + + docutils/ + |-- branches/ + | |-- branch1/ + | | |-- docutils/ + | | |-- sandbox/ + | | `-- web/ + | `-- branch2/ + | |-- docutils/ + | |-- sandbox/ + | `-- web/ + |-- tags/ + | |-- tag1/ + | | |-- docutils/ + | | |-- sandbox/ + | | `-- web/ + | `-- tag2/ + | |-- docutils/ + | |-- sandbox/ + | `-- web/ + `-- trunk/ + |-- docutils/ + |-- sandbox/ + `-- web/ + +The main source tree lives at ``docutils/trunk/docutils/``, next to +the sandboxes (``docutils/trunk/sandbox/``) and the web site files +(``docutils/trunk/web/``). + +``docutils/branches/`` and ``docutils/tags/`` contain (shallow) copies +of either the whole trunk or only the main source tree +(``docutils/trunk/docutils``). diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/dev/rst/alternatives.txt b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/dev/rst/alternatives.txt new file mode 100644 index 00000000..a9735ea9 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/dev/rst/alternatives.txt @@ -0,0 +1,3211 @@ +================================================== + A Record of reStructuredText Syntax Alternatives +================================================== + +:Author: David Goodger +:Contact: docutils-develop@lists.sourceforge.net +:Revision: $Revision$ +:Date: $Date$ +:Copyright: This document has been placed in the public domain. + +The following are ideas, alternatives, and justifications that were +considered for reStructuredText syntax, which did not originate with +Setext_ or StructuredText_. For an analysis of constructs which *did* +originate with StructuredText or Setext, please see `Problems With +StructuredText`_. See the `reStructuredText Markup Specification`_ +for full details of the established syntax. + +The ideas are divided into sections: + +* Implemented_: already done. The issues and alternatives are + recorded here for posterity. + +* `Not Implemented`_: these ideas won't be implemented. + +* Tabled_: these ideas should be revisited in the future. + +* `To Do`_: these ideas should be implemented. They're just waiting + for a champion to resolve issues and get them done. + +* `... Or Not To Do?`_: possible but questionable. These probably + won't be implemented, but you never know. + +.. _Setext: https://docutils.sourceforge.io/mirror/setext.html +.. _StructuredText: https://zopestructuredtext.readthedocs.org/ +.. _Problems with StructuredText: problems.html +.. _reStructuredText Markup Specification: + ../../ref/rst/restructuredtext.html + + +.. contents:: + +------------- + Implemented +------------- + +Field Lists +=========== + +Prior to the syntax for field lists being finalized, several +alternatives were proposed. + +1. Unadorned RFC822_ everywhere:: + + Author: Me + Version: 1 + + Advantages: clean, precedent (RFC822-compliant). Disadvantage: + ambiguous (these paragraphs are a prime example). + + Conclusion: rejected. + +2. Special case: use unadorned RFC822_ for the very first or very last + text block of a document:: + + """ + Author: Me + Version: 1 + + The rest of the document... + """ + + Advantages: clean, precedent (RFC822-compliant). Disadvantages: + special case, flat (unnested) field lists only, still ambiguous:: + + """ + Usage: cmdname [options] arg1 arg2 ... + + We obviously *don't* want the like above to be interpreted as a + field list item. Or do we? + """ + + Conclusion: rejected for the general case, accepted for specific + contexts (PEPs, email). + +3. Use a directive:: + + .. fields:: + + Author: Me + Version: 1 + + Advantages: explicit and unambiguous, RFC822-compliant. + Disadvantage: cumbersome. + + Conclusion: rejected for the general case (but such a directive + could certainly be written). + +4. Use Javadoc-style:: + + @Author: Me + @Version: 1 + @param a: integer + + Advantages: unambiguous, precedent, flexible. Disadvantages: + non-intuitive, ugly, not RFC822-compliant. + + Conclusion: rejected. + +5. Use leading colons:: + + :Author: Me + :Version: 1 + + Advantages: unambiguous, obvious (*almost* RFC822-compliant), + flexible, perhaps even elegant. Disadvantages: no precedent, not + quite RFC822-compliant. + + Conclusion: accepted! + +6. Use double colons:: + + Author:: Me + Version:: 1 + + Advantages: unambiguous, obvious? (*almost* RFC822-compliant), + flexible, similar to syntax already used for literal blocks and + directives. Disadvantages: no precedent, not quite + RFC822-compliant, similar to syntax already used for literal blocks + and directives. + + Conclusion: rejected because of the syntax similarity & conflicts. + +Why is RFC822 compliance important? It's a universal Internet +standard, and super obvious. Also, I'd like to support the PEP format +(ulterior motive: get PEPs to use reStructuredText as their standard). +But it *would* be easy to get used to an alternative (easy even to +convert PEPs; probably harder to convert python-deviants ;-). + +Unfortunately, without well-defined context (such as in email headers: +RFC822 only applies before any blank lines), the RFC822 format is +ambiguous. It is very common in ordinary text. To implement field +lists unambiguously, we need explicit syntax. + +The following question was posed in a footnote: + + Should "bibliographic field lists" be defined at the parser level, + or at the DPS transformation level? In other words, are they + reStructuredText-specific, or would they also be applicable to + another (many/every other?) syntax? + +The answer is that bibliographic fields are a +reStructuredText-specific markup convention. Other syntaxes may +implement the bibliographic elements explicitly. For example, there +would be no need for such a transformation for an XML-based markup +syntax. + +.. _RFC822: https://www.rfc-editor.org/rfc/rfc822.txt + + +Interpreted Text "Roles" +======================== + +The original purpose of interpreted text was as a mechanism for +descriptive markup, to describe the nature or role of a word or +phrase. For example, in XML we could say "len" +to mark up "len" as a function. It is envisaged that within Python +docstrings (inline documentation in Python module source files, the +primary market for reStructuredText) the role of a piece of +interpreted text can be inferred implicitly from the context of the +docstring within the program source. For other applications, however, +the role may have to be indicated explicitly. + +Interpreted text is enclosed in single backquotes (`). + +1. Initially, it was proposed that an explicit role could be indicated + as a word or phrase within the enclosing backquotes: + + - As a prefix, separated by a colon and whitespace:: + + `role: interpreted text` + + - As a suffix, separated by whitespace and a colon:: + + `interpreted text :role` + + There are problems with the initial approach: + + - There could be ambiguity with interpreted text containing colons. + For example, an index entry of "Mission: Impossible" would + require a backslash-escaped colon. + + - The explicit role is descriptive markup, not content, and will + not be visible in the processed output. Putting it inside the + backquotes doesn't feel right; the *role* isn't being quoted. + +2. Tony Ibbs suggested that the role be placed outside the + backquotes:: + + role:`prefix` or `suffix`:role + + This removes the embedded-colons ambiguity, but limits the role + identifier to be a single word (whitespace would be illegal). + Since roles are not meant to be visible after processing, the lack + of whitespace support is not important. + + The suggested syntax remains ambiguous with respect to ratios and + some writing styles. For example, suppose there is a "signal" + identifier, and we write:: + + ...calculate the `signal`:noise ratio. + + "noise" looks like a role. + +3. As an improvement on #2, we can bracket the role with colons:: + + :role:`prefix` or `suffix`:role: + + This syntax is similar to that of field lists, which is fine since + both are doing similar things: describing. + + This is the syntax chosen for reStructuredText. + +4. Another alternative is two colons instead of one:: + + role::`prefix` or `suffix`::role + + But this is used for analogies ("A:B::C:D": "A is to B as C is to + D"). + + Both alternative #2 and #4 lack delimiters on both sides of the + role, making it difficult to parse (by the reader). + +5. Some kind of bracketing could be used: + + - Parentheses:: + + (role)`prefix` or `suffix`(role) + + - Braces:: + + {role}`prefix` or `suffix`{role} + + - Square brackets:: + + [role]`prefix` or `suffix`[role] + + - Angle brackets:: + + `prefix` or `suffix` + + (The overlap of \*ML tags with angle brackets would be too + confusing and precludes their use.) + +Syntax #3 was chosen for reStructuredText. + + +Comments +======== + +A problem with comments (actually, with all indented constructs) is +that they cannot be followed by an indented block -- a block quote -- +without swallowing it up. + +I thought that perhaps comments should be one-liners only. But would +this mean that footnotes, hyperlink targets, and directives must then +also be one-liners? Not a good solution. + +Tony Ibbs suggested a "comment" directive. I added that we could +limit a comment to a single text block, and that a "multi-block +comment" could use "comment-start" and "comment-end" directives. This +would remove the indentation incompatibility. A "comment" directive +automatically suggests "footnote" and (hyperlink) "target" directives +as well. This could go on forever! Bad choice. + +Garth Kidd suggested that an "empty comment", a ".." explicit markup +start with nothing on the first line (except possibly whitespace) and +a blank line immediately following, could serve as an "unindent". An +empty comment does **not** swallow up indented blocks following it, +so block quotes are safe. "A tiny but practical wart." Accepted. + + +Anonymous Hyperlinks +==================== + +Alan Jaffray came up with this idea, along with the following syntax:: + + Search the `Python DOC-SIG mailing list archives`{}_. + + .. _: https://mail.python.org/pipermail/doc-sig/ + +The idea is sound and useful. I suggested a "double underscore" +syntax:: + + Search the `Python DOC-SIG mailing list archives`__. + + .. __: https://mail.python.org/pipermail/doc-sig/ + +But perhaps single underscores are okay? The syntax looks better, but +the hyperlink itself doesn't explicitly say "anonymous":: + + Search the `Python DOC-SIG mailing list archives`_. + + .. _: https://mail.python.org/pipermail/doc-sig/ + +Mixing anonymous and named hyperlinks becomes confusing. The order of +targets is not significant for named hyperlinks, but it is for +anonymous hyperlinks:: + + Hyperlinks: anonymous_, named_, and another anonymous_. + + .. _named: named + .. _: anonymous1 + .. _: anonymous2 + +Without the extra syntax of double underscores, determining which +hyperlink references are anonymous may be difficult. We'd have to +check which references don't have corresponding targets, and match +those up with anonymous targets. Keeping to a simple consistent +ordering (as with auto-numbered footnotes) seems simplest. + +reStructuredText will use the explicit double-underscore syntax for +anonymous hyperlinks. An alternative (see `Reworking Explicit Markup +(Round 1)`_ below) for the somewhat awkward ".. __:" syntax is "__":: + + An anonymous__ reference. + + __ http://anonymous + + +Reworking Explicit Markup (Round 1) +=================================== + +Alan Jaffray came up with the idea of `anonymous hyperlinks`_, added +to reStructuredText. Subsequently it was asserted that hyperlinks +(especially anonymous hyperlinks) would play an increasingly important +role in reStructuredText documents, and therefore they require a +simpler and more concise syntax. This prompted a review of the +current and proposed explicit markup syntaxes with regards to +improving usability. + +1. Original syntax:: + + .. _blah: internal hyperlink target + .. _blah: http://somewhere external hyperlink target + .. _blah: blahblah_ indirect hyperlink target + .. __: anonymous internal target + .. __: http://somewhere anonymous external target + .. __: blahblah_ anonymous indirect target + .. [blah] http://somewhere footnote + .. blah:: http://somewhere directive + .. blah: http://somewhere comment + + .. Note:: + + The comment text was intentionally made to look like a hyperlink + target. + + Origins: + + * Except for the colon (a delimiter necessary to allow for + phrase-links), hyperlink target ``.. _blah:`` comes from Setext. + * Comment syntax from Setext. + * Footnote syntax from StructuredText ("named links"). + * Directives and anonymous hyperlinks original to reStructuredText. + + Advantages: + + + Consistent explicit markup indicator: "..". + + Consistent hyperlink syntax: ".. _" & ":". + + Disadvantages: + + - Anonymous target markup is awkward: ".. __:". + - The explicit markup indicator ("..") is excessively overloaded? + - Comment text is limited (can't look like a footnote, hyperlink, + or directive). But this is probably not important. + +2. Alan Jaffray's proposed syntax #1:: + + __ _blah internal hyperlink target + __ blah: http://somewhere external hyperlink target + __ blah: blahblah_ indirect hyperlink target + __ anonymous internal target + __ http://somewhere anonymous external target + __ blahblah_ anonymous indirect target + __ [blah] http://somewhere footnote + .. blah:: http://somewhere directive + .. blah: http://somewhere comment + + The hyperlink-connoted underscores have become first-level syntax. + + Advantages: + + + Anonymous targets are simpler. + + All hyperlink targets are one character shorter. + + Disadvantages: + + - Inconsistent internal hyperlink targets. Unlike all other named + hyperlink targets, there's no colon. There's an extra leading + underscore, but we can't drop it because without it, "blah" looks + like a relative URI. Unless we restore the colon:: + + __ blah: internal hyperlink target + + - Obtrusive markup? + +3. Alan Jaffray's proposed syntax #2:: + + .. _blah internal hyperlink target + .. blah: http://somewhere external hyperlink target + .. blah: blahblah_ indirect hyperlink target + .. anonymous internal target + .. http://somewhere anonymous external target + .. blahblah_ anonymous indirect target + .. [blah] http://somewhere footnote + !! blah: http://somewhere directive + ## blah: http://somewhere comment + + Leading underscores have been (almost) replaced by "..", while + comments and directives have gained their own syntax. + + Advantages: + + + Anonymous hyperlinks are simpler. + + Unique syntax for comments. Connotation of "comment" from + some programming languages (including our favorite). + + Unique syntax for directives. Connotation of "action!". + + Disadvantages: + + - Inconsistent internal hyperlink targets. Again, unlike all other + named hyperlink targets, there's no colon. There's a leading + underscore, matching the trailing underscores of references, + which no other hyperlink targets have. We can't drop that one + leading underscore though: without it, "blah" looks like a + relative URI. Again, unless we restore the colon:: + + .. blah: internal hyperlink target + + - All (except for internal) hyperlink targets lack their leading + underscores, losing the "hyperlink" connotation. + + - Obtrusive syntax for comments. Alternatives:: + + ;; blah: http://somewhere + (also comment syntax in Lisp & others) + ,, blah: http://somewhere + ("comma comma": sounds like "comment"!) + + - Iffy syntax for directives. Alternatives? + +4. Tony Ibbs' proposed syntax:: + + .. _blah: internal hyperlink target + .. _blah: http://somewhere external hyperlink target + .. _blah: blahblah_ indirect hyperlink target + .. anonymous internal target + .. http://somewhere anonymous external target + .. blahblah_ anonymous indirect target + .. [blah] http://somewhere footnote + .. blah:: http://somewhere directive + .. blah: http://somewhere comment + + This is the same as the current syntax, except for anonymous + targets which drop their "__: ". + + Advantage: + + + Anonymous targets are simpler. + + Disadvantages: + + - Anonymous targets lack their leading underscores, losing the + "hyperlink" connotation. + - Anonymous targets are almost indistinguishable from comments. + (Better to know "up front".) + +5. David Goodger's proposed syntax: Perhaps going back to one of + Alan's earlier suggestions might be the best solution. How about + simply adding "__ " as a synonym for ".. __: " in the original + syntax? These would become equivalent:: + + .. __: anonymous internal target + .. __: http://somewhere anonymous external target + .. __: blahblah_ anonymous indirect target + + __ anonymous internal target + __ http://somewhere anonymous external target + __ blahblah_ anonymous indirect target + +Alternative 5 has been adopted. + + +Backquotes in Phrase-Links +========================== + +[From a 2001-06-05 Doc-SIG post in reply to questions from Doug +Hellmann.] + +The first draft of the spec, posted to the Doc-SIG in November 2000, +used square brackets for phrase-links. I changed my mind because: + +1. In the first draft, I had already decided on single-backquotes for + inline literal text. + +2. However, I wanted to minimize the necessity for backslash escapes, + for example when quoting Python repr-equivalent syntax that uses + backquotes. + +3. The processing of identifiers (function/method/attribute/module + etc. names) into hyperlinks is a useful feature. PyDoc recognizes + identifiers heuristically, but it doesn't take much imagination to + come up with counter-examples where PyDoc's heuristics would result + in embarrassing failure. I wanted to do it deterministically, and + that called for syntax. I called this construct "interpreted + text". + +4. Leveraging off the ``*emphasis*/**strong**`` syntax, lead to the + idea of using double-backquotes as syntax. + +5. I worked out some rules for inline markup recognition. + +6. In combination with #5, double backquotes lent themselves to inline + literals, neatly satisfying #2, minimizing backslash escapes. In + fact, the spec says that no interpretation of any kind is done + within double-backquote inline literal text; backslashes do *no* + escaping within literal text. + +7. Single backquotes are then freed up for interpreted text. + +8. I already had square brackets required for footnote references. + +9. Since interpreted text will typically turn into hyperlinks, it was + a natural fit to use backquotes as the phrase-quoting syntax for + trailing-underscore hyperlinks. + +The original inspiration for the trailing underscore hyperlink syntax +was Setext. But for phrases Setext used a very cumbersome +``underscores_between_words_like_this_`` syntax. + +The underscores can be viewed as if they were right-pointing arrows: +``-->``. So ``hyperlink_`` points away from the reference, and +``.. _hyperlink:`` points toward the target. + + +Substitution Mechanism +====================== + +Substitutions arose out of a Doc-SIG thread begun on 2001-10-28 by +Alan Jaffray, "reStructuredText inline markup". It reminded me of a +missing piece of the reStructuredText puzzle, first referred to in my +contribution to "Documentation markup & processing / PEPs" (Doc-SIG +2001-06-21). + +Substitutions allow the power and flexibility of directives to be +shared by inline text. They are a way to allow arbitrarily complex +inline objects, while keeping the details out of the flow of text. +They are the equivalent of SGML/XML's named entities. For example, an +inline image (using reference syntax alternative 4d (vertical bars) +and definition alternative 3, the alternatives chosen for inclusion in +the spec):: + + The |biohazard| symbol must be used on containers used to dispose + of medical waste. + + .. |biohazard| image:: biohazard.png + [height=20 width=20] + +The ``|biohazard|`` substitution reference will be replaced in-line by +whatever the ``.. |biohazard|`` substitution definition generates (in +this case, an image). A substitution definition contains the +substitution text bracketed with vertical bars, followed by a an +embedded inline-compatible directive, such as "image". A transform is +required to complete the substitution. + +Syntax alternatives for the reference: + +1. Use the existing interpreted text syntax, with a predefined role + such as "sub":: + + The `biohazard`:sub: symbol... + + Advantages: existing syntax, explicit. Disadvantages: verbose, + obtrusive. + +2. Use a variant of the interpreted text syntax, with a new suffix + akin to the underscore in phrase-link references:: + + (a) `name`@ + (b) `name`# + (c) `name`& + (d) `name`/ + (e) `name`< + (f) `name`:: + (g) `name`: + + + Due to incompatibility with other constructs and ordinary text + usage, (f) and (g) are not possible. + +3. Use interpreted text syntax with a fixed internal format:: + + (a) `:name:` + (b) `name:` + (c) `name::` + (d) `::name::` + (e) `%name%` + (f) `#name#` + (g) `/name/` + (h) `&name&` + (i) `|name|` + (j) `[name]` + (k) `` + (l) `&name;` + (m) `'name'` + + To avoid ML confusion (k) and (l) are definitely out. Square + brackets (j) won't work in the target (the substitution definition + would be indistinguishable from a footnote). + + The ```/name/``` syntax (g) is reminiscent of "s/find/sub" + substitution syntax in ed-like languages. However, it may have a + misleading association with regexps, and looks like an absolute + POSIX path. (i) is visually equivalent and lacking the + connotations. + + A disadvantage of all of these is that they limit interpreted text, + albeit only slightly. + +4. Use specialized syntax, something new:: + + (a) #name# + (b) @name@ + (c) /name/ + (d) |name| + (e) <> + (f) //name// + (g) ||name|| + (h) ^name^ + (i) [[name]] + (j) ~name~ + (k) !name! + (l) =name= + (m) ?name? + (n) >name< + + "#" (a) and "@" (b) are obtrusive. "/" (c) without backquotes + looks just like a POSIX path; it is likely for such usage to appear + in text. + + "|" (d) and "^" (h) are feasible. + +5. Redefine the trailing underscore syntax. See definition syntax + alternative 4, below. + +Syntax alternatives for the definition: + +1. Use the existing directive syntax, with a predefined directive such + as "sub". It contains a further embedded directive resolving to an + inline-compatible object:: + + .. sub:: biohazard + .. image:: biohazard.png + [height=20 width=20] + + .. sub:: parrot + That bird wouldn't *voom* if you put 10,000,000 volts + through it! + + The advantages and disadvantages are the same as in inline + alternative 1. + +2. Use syntax as in #1, but with an embedded directivecompressed:: + + .. sub:: biohazard image:: biohazard.png + [height=20 width=20] + + This is a bit better than alternative 1, but still too much. + +3. Use a variant of directive syntax, incorporating the substitution + text, obviating the need for a special "sub" directive name. If we + assume reference alternative 4d (vertical bars), the matching + definition would look like this:: + + .. |biohazard| image:: biohazard.png + [height=20 width=20] + +4. (Suggested by Alan Jaffray on Doc-SIG from 2001-11-06.) + + Instead of adding new syntax, redefine the trailing underscore + syntax to mean "substitution reference" instead of "hyperlink + reference". Alan's example:: + + I had lunch with Jonathan_ today. We talked about Zope_. + + .. _Jonathan: lj [user=jhl] + .. _Zope: https://www.zope.dev/ + + A problem with the proposed syntax is that URIs which look like + simple reference names (alphanum plus ".", "-", "_") would be + indistinguishable from substitution directive names. A more + consistent syntax would be:: + + I had lunch with Jonathan_ today. We talked about Zope_. + + .. _Jonathan: lj:: user=jhl + .. _Zope: https://www.zope.dev/ + + (``::`` after ``.. _Jonathan: lj``.) + + The "Zope" target is a simple external hyperlink, but the + "Jonathan" target contains a directive. Alan proposed is that the + reference text be replaced by whatever the referenced directive + (the "directive target") produces. A directive reference becomes a + hyperlink reference if the contents of the directive target resolve + to a hyperlink. If the directive target resolves to an icon, the + reference is replaced by an inline icon. If the directive target + resolves to a hyperlink, the directive reference becomes a + hyperlink reference. + + This seems too indirect and complicated for easy comprehension. + + The reference in the text will sometimes become a link, sometimes + not. Sometimes the reference text will remain, sometimes not. We + don't know *at the reference*:: + + This is a `hyperlink reference`_; its text will remain. + This is an `inline icon`_; its text will disappear. + + That's a problem. + +The syntax that has been incorporated into the spec and parser is +reference alternative 4d with definition alternative 3:: + + The |biohazard| symbol... + + .. |biohazard| image:: biohazard.png + [height=20 width=20] + +We can also combine substitution references with hyperlink references, +by appending a "_" (named hyperlink reference) or "__" (anonymous +hyperlink reference) suffix to the substitution reference. This +allows us to click on an image-link:: + + The |biohazard|_ symbol... + + .. |biohazard| image:: biohazard.png + [height=20 width=20] + .. _biohazard: https://www.cdc.gov/ + +There have been several suggestions for the naming of these +constructs, originally called "substitution references" and +"substitutions". + +1. Candidate names for the reference construct: + + (a) substitution reference + (b) tagging reference + (c) inline directive reference + (d) directive reference + (e) indirect inline directive reference + (f) inline directive placeholder + (g) inline directive insertion reference + (h) directive insertion reference + (i) insertion reference + (j) directive macro reference + (k) macro reference + (l) substitution directive reference + +2. Candidate names for the definition construct: + + (a) substitution + (b) substitution directive + (c) tag + (d) tagged directive + (e) directive target + (f) inline directive + (g) inline directive definition + (h) referenced directive + (i) indirect directive + (j) indirect directive definition + (k) directive definition + (l) indirect inline directive + (m) named directive definition + (n) inline directive insertion definition + (o) directive insertion definition + (p) insertion definition + (q) insertion directive + (r) substitution definition + (s) directive macro definition + (t) macro definition + (u) substitution directive definition + (v) substitution definition + +"Inline directive reference" (1c) seems to be an appropriate term at +first, but the term "inline" is redundant in the case of the +reference. Its counterpart "inline directive definition" (2g) is +awkward, because the directive definition itself is not inline. + +"Directive reference" (1d) and "directive definition" (2k) are too +vague. "Directive definition" could be used to refer to any +directive, not just those used for inline substitutions. + +One meaning of the term "macro" (1k, 2s, 2t) is too +programming-language-specific. Also, macros are typically simple text +substitution mechanisms: the text is substituted first and evaluated +later. reStructuredText substitution definitions are evaluated in +place at parse time and substituted afterwards. + +"Insertion" (1h, 1i, 2n-2q) is almost right, but it implies that +something new is getting added rather than one construct being +replaced by another. + +Which brings us back to "substitution". The overall best names are +"substitution reference" (1a) and "substitution definition" (2v). A +long way to go to add one word! + + +Inline External Targets +======================= + +Currently reStructuredText has two hyperlink syntax variations: + +* Named hyperlinks:: + + This is a named reference_ of one word ("reference"). Here is + a `phrase reference`_. Phrase references may even cross `line + boundaries`_. + + .. _reference: https://www.example.org/reference/ + .. _phrase reference: https://www.example.org/phrase_reference/ + .. _line boundaries: https://www.example.org/line_boundaries/ + + + Advantages: + + - The plaintext is readable. + - Each target may be reused multiple times (e.g., just write + ``"reference_"`` again). + - No synchronized ordering of references and targets is necessary. + + + Disadvantages: + + - The reference text must be repeated as target names; could lead + to mistakes. + - The target URLs may be located far from the references, and hard + to find in the plaintext. + +* Anonymous hyperlinks (in current reStructuredText):: + + This is an anonymous reference__. Here is an anonymous + `phrase reference`__. Phrase references may even cross `line + boundaries`__. + + __ https://www.example.org/reference/ + __ https://www.example.org/phrase_reference/ + __ https://www.example.org/line_boundaries/ + + + Advantages: + + - The plaintext is readable. + - The reference text does not have to be repeated. + + + Disadvantages: + + - References and targets must be kept in sync. + - Targets cannot be reused. + - The target URLs may be located far from the references. + +For comparison and historical background, StructuredText also has two +syntaxes for hyperlinks: + +* First, ``"reference text":URL``:: + + This is a "reference":https://www.example.org/reference/ + of one word ("reference"). Here is a "phrase + reference":https://www.example.org/phrase_reference/. + +* Second, ``"reference text", https://example.org/absolute_URL``:: + + This is a "reference", https://www.example.org/reference/ + of one word ("reference"). Here is a "phrase reference", + https://www.example.org/phrase_reference/. + +Both syntaxes share advantages and disadvantages: + ++ Advantages: + + - The target is specified immediately adjacent to the reference. + ++ Disadvantages: + + - Poor plaintext readability. + - Targets cannot be reused. + - Both syntaxes use double quotes, common in ordinary text. + - In the first syntax, the URL and the last word are stuck + together, exacerbating the line wrap problem. + - The second syntax is too magical; text could easily be written + that way by accident (although only absolute URLs are recognized + here, perhaps because of the potential for ambiguity). + +A new type of "inline external hyperlink" has been proposed. + +1. On 2002-06-28, Simon Budig proposed__ a new syntax for + reStructuredText hyperlinks:: + + This is a reference_(https://www.example.org/reference/) of one + word ("reference"). Here is a `phrase + reference`_(https://www.example.org/phrase_reference/). Are + these examples, (single-underscore), named? If so, `anonymous + references`__(https://www.example.org/anonymous/) using two + underscores would probably be preferable. + + __ https://mail.python.org/pipermail/doc-sig/2002-June/002648.html + + The syntax, advantages, and disadvantages are similar to those of + StructuredText. + + + Advantages: + + - The target is specified immediately adjacent to the reference. + + + Disadvantages: + + - Poor plaintext readability. + - Targets cannot be reused (unless named, but the semantics are + unclear). + + + Problems: + + - The ``"`ref`_(URL)"`` syntax forces the last word of the + reference text to be joined to the URL, making a potentially + very long word that can't be wrapped (URLs can be very long). + The reference and the URL should be separate. This is a + symptom of the following point: + + - The syntax produces a single compound construct made up of two + equally important parts, *with syntax in the middle*, *between* + the reference and the target. This is unprecedented in + reStructuredText. + + - The "inline hyperlink" text is *not* a named reference (there's + no lookup by name), so it shouldn't look like one. + + - According to the IETF standards RFC 2396 and RFC 2732, + parentheses are legal URI characters and curly braces are legal + email characters, making their use prohibitively difficult. + + - The named/anonymous semantics are unclear. + +2. After an analysis__ of the syntax of (1) above, we came up with the + following compromise syntax:: + + This is an anonymous reference__ + __ of one word + ("reference"). Here is a `phrase reference`__ + __. `Named + references`_ _ use single + underscores. + + __ https://mail.python.org/pipermail/doc-sig/2002-July/002670.html + + The syntax builds on that of the existing "inline internal + targets": ``an _`inline internal target`.`` + + + Advantages: + + - The target is specified immediately adjacent to the reference, + improving maintainability: + + - References and targets are easily kept in sync. + - The reference text does not have to be repeated. + + - The construct is executed in two parts: references identical to + existing references, and targets that are new but not too big a + stretch from current syntax. + + - There's overwhelming precedent for quoting URLs with angle + brackets [#]_. + + + Disadvantages: + + - Poor plaintext readability. + - Lots of "line noise". + - Targets cannot be reused (unless named; see below). + + To alleviate the readability issue slightly, we could allow the + target to appear later, such as after the end of the sentence:: + + This is a named reference__ of one word ("reference"). + __ Here is a `phrase + reference`__. __ + + Problem: this could only work for one reference at a time + (reference/target pairs must be proximate [refA trgA refB trgB], + not interleaved [refA refB trgA trgB] or nested [refA refB trgB + trgA]). This variation is too problematic; references and inline + external targets will have to be kept immediately adjacent (see (3) + below). + + The ``"reference__ __"`` syntax is actually for "anonymous + inline external targets", emphasized by the double underscores. It + follows that single trailing and leading underscores would lead to + *implicitly named* inline external targets. This would allow the + reuse of targets by name. So after ``"reference_ _"``, + another ``"reference_"`` would point to the same target. + + .. [#] + From RFC 2396 (URI syntax): + + The angle-bracket "<" and ">" and double-quote (") + characters are excluded [from URIs] because they are often + used as the delimiters around URI in text documents and + protocol fields. + + Using <> angle brackets around each URI is especially + recommended as a delimiting style for URI that contain + whitespace. + + From RFC 822 (email headers): + + Angle brackets ("<" and ">") are generally used to indicate + the presence of a one machine-usable reference (e.g., + delimiting mailboxes), possibly including source-routing to + the machine. + +3. If it is best for references and inline external targets to be + immediately adjacent, then they might as well be integrated. + Here's an alternative syntax embedding the target URL in the + reference:: + + This is an anonymous `reference `__ of one word ("reference"). Here is a `phrase + reference `__. + + Advantages and disadvantages are similar to those in (2). + Readability is still an issue, but the syntax is a bit less + heavyweight (reduced line noise). Backquotes are required, even + for one-word references; the target URL is included within the + reference text, forcing a phrase context. + + We'll call this variant "embedded URIs". + + Problem: how to refer to a title like "HTML Anchors: " (which + ends with an HTML/SGML/XML tag)? We could either require more + syntax on the target (like ``"`reference text + __`__"``), or require the odd conflicting + title to be escaped (like ``"`HTML Anchors: \`__"``). The + latter seems preferable, and not too onerous. + + Similarly to (2) above, a single trailing underscore would convert + the reference & inline external target from anonymous to implicitly + named, allowing reuse of targets by name. + + I think this is the least objectionable of the syntax alternatives. + +Other syntax variations have been proposed (by Brett Cannon and Benja +Fallenstein):: + + `phrase reference`->https://www.example.org + + `phrase reference`@https://www.example.org + + `phrase reference`__ ->https://www.example.org + + `phrase reference` [-> https://www.example.org] + + `phrase reference`__ [-> https://www.example.org] + + `phrase reference` _ + +None of these variations are clearly superior to #3 above. Some have +problems that exclude their use. + +With any kind of inline external target syntax it comes down to the +conflict between maintainability and plaintext readability. I don't +see a major problem with reStructuredText's maintainability, and I +don't want to sacrifice plaintext readability to "improve" it. + +The proponents of inline external targets want them for easily +maintainable web pages. The arguments go something like this: + +- Named hyperlinks are difficult to maintain because the reference + text is duplicated as the target name. + + To which I said, "So use anonymous hyperlinks." + +- Anonymous hyperlinks are difficult to maintain because the + references and targets have to be kept in sync. + + "So keep the targets close to the references, grouped after each + paragraph. Maintenance is trivial." + +- But targets grouped after paragraphs break the flow of text. + + "Surely less than URLs embedded in the text! And if the intent is + to produce web pages, not readable plaintext, then who cares about + the flow of text?" + +Many participants have voiced their objections to the proposed syntax: + + Garth Kidd: "I strongly prefer the current way of doing it. + Inline is spectactularly messy, IMHO." + + Tony Ibbs: "I vehemently agree... that the inline alternatives + being suggested look messy - there are/were good reasons they've + been taken out... I don't believe I would gain from the new + syntaxes." + + Paul Moore: "I agree as well. The proposed syntax is far too + punctuation-heavy, and any of the alternatives discussed are + ambiguous or too subtle." + +Others have voiced their support: + + fantasai: "I agree with Simon. In many cases, though certainly + not in all, I find parenthesizing the url in plain text flows + better than relegating it to a footnote." + + Ken Manheimer: "I'd like to weigh in requesting some kind of easy, + direct inline reference link." + +(Interesting that those *against* the proposal have been using +reStructuredText for a while, and those *for* the proposal are either +new to the list ["fantasai", background unknown] or longtime +StructuredText users [Ken Manheimer].) + +I was initially ambivalent/against the proposed "inline external +targets". I value reStructuredText's readability very highly, and +although the proposed syntax offers convenience, I don't know if the +convenience is worth the cost in ugliness. Does the proposed syntax +compromise readability too much, or should the choice be left up to +the author? Perhaps if the syntax is *allowed* but its use strongly +*discouraged*, for aesthetic/readability reasons? + +After a great deal of thought and much input from users, I've decided +that there are reasonable use cases for this construct. The +documentation should strongly caution against its use in most +situations, recommending independent block-level targets instead. +Syntax #3 above ("embedded URIs") will be used. + + +Doctree Representation of Transitions +===================================== + +(Although not reStructuredText-specific, this section fits best in +this document.) + +Having added the "horizontal rule" construct to the `reStructuredText +Markup Specification`_, a decision had to be made as to how to reflect +the construct in the implementation of the document tree. Given this +source:: + + Document + ======== + + Paragraph 1 + + -------- + + Paragraph 2 + +The horizontal rule indicates a "transition" (in prose terms) or the +start of a new "division". Before implementation, the parsed document +tree would be:: + + +
+ + Document + <paragraph> + Paragraph 1 + -------- <--- error here + <paragraph> + Paragraph 2 + +There are several possibilities for the implementation: + +1. Implement horizontal rules as "divisions" or segments. A + "division" is a title-less, non-hierarchical section. The first + try at an implementation looked like this:: + + <document> + <section names="document"> + <title> + Document + <paragraph> + Paragraph 1 + <division> + <paragraph> + Paragraph 2 + + But the two paragraphs are really at the same level; they shouldn't + appear to be at different levels. There's really an invisible + "first division". The horizontal rule splits the document body + into two segments, which should be treated uniformly. + +2. Treating "divisions" uniformly brings us to the second + possibility:: + + <document> + <section names="document"> + <title> + Document + <division> + <paragraph> + Paragraph 1 + <division> + <paragraph> + Paragraph 2 + + With this change, documents and sections will directly contain + divisions and sections, but not body elements. Only divisions will + directly contain body elements. Even without a horizontal rule + anywhere, the body elements of a document or section would be + contained within a division element. This makes the document tree + deeper. This is similar to the way HTML_ treats document contents: + grouped within a ``<body>`` element. + +3. Implement them as "transitions", empty elements:: + + <document> + <section names="document"> + <title> + Document + <paragraph> + Paragraph 1 + <transition> + <paragraph> + Paragraph 2 + + A transition would be a "point element", not containing anything, + only identifying a point within the document structure. This keeps + the document tree flatter, but the idea of a "point element" like + "transition" smells bad. A transition isn't a thing itself, it's + the space between two divisions. However, transitions are a + practical solution. + +Solution 3 was chosen for incorporation into the document tree model. + +.. _HTML: https://www.w3.org/MarkUp/ + + +Syntax for Line Blocks +====================== + +* An early idea: How about a literal-block-like prefix, perhaps + "``;;``"? (It is, after all, a *semi-literal* literal block, no?) + Example:: + + Take it away, Eric the Orchestra Leader! ;; + + A one, two, a one two three four + + Half a bee, philosophically, + must, *ipso facto*, half not be. + But half the bee has got to be, + *vis a vis* its entity. D'you see? + + But can a bee be said to be + or not to be an entire bee, + when half the bee is not a bee, + due to some ancient injury? + + Singing... + + Kinda lame. + +* Another idea: in an ordinary paragraph, if the first line ends with + a backslash (escaping the newline), interpret the entire paragraph + as a verse block? For example:: + + Add just one backslash\ + And this paragraph becomes + An awful haiku + + (Awful, and arguably invalid, since in Japanese the word "haiku" + contains three syllables not two.) + + This idea was superseded by the rules for escaped whitespace, useful + for `character-level inline markup`_. + +* In a `2004-02-22 docutils-develop message`__, Jarno Elonen proposed + a "plain list" syntax (and also provided a patch):: + + | John Doe + | President, SuperDuper Corp. + | jdoe@example.org + + __ https://thread.gmane.org/gmane.text.docutils.devel/1187 + + This syntax is very natural. However, these "plain lists" seem very + similar to line blocks, and I see so little intrinsic "list-ness" + that I'm loathe to add a new object. I used the term "blurbs" to + remove the "list" connotation from the originally proposed name. + Perhaps line blocks could be refined to add the two properties they + currently lack: + + A) long lines wrap nicely + B) HTML output doesn't look like program code in non-CSS web + browsers + + (A) is an issue of all 3 aspects of Docutils: syntax (construct + behaviour), internal representation, and output. (B) is partly an + issue of internal representation but mostly of output. + +ReStructuredText will redefine line blocks with the "|"-quoting +syntax. The following is my current thinking. + + +Syntax +------ + +Perhaps line block syntax like this would do:: + + | M6: James Bond + | MIB: Mr. J. + | IMF: not decided yet, but probably one of the following: + | Ethan Hunt + | Jim Phelps + | Claire Phelps + | CIA: Lea Leiter + +Note that the "nested" list does not have nested syntax (the "|" are +not further indented); the leading whitespace would still be +significant somehow (more below). As for long lines in the input, +this could suffice:: + + | John Doe + | Founder, President, Chief Executive Officer, Cook, Bottle + Washer, and All-Round Great Guy + | SuperDuper Corp. + | jdoe@example.org + +The lack of "|" on the third line indicates that it's a continuation +of the second line, wrapped. + +I don't see much point in allowing arbitrary nested content. Multiple +paragraphs or bullet lists inside a "blurb" doesn't make sense to me. +Simple nested line blocks should suffice. + + +Internal Representation +----------------------- + +Line blocks are currently represented as text blobs as follows:: + + <!ELEMENT line_block %text.model;> + <!ATTLIST line_block + %basic.atts; + %fixedspace.att;> + +Instead, we could represent each line by a separate element:: + + <!ELEMENT line_block (line+)> + <!ATTLIST line_block %basic.atts;> + + <!ELEMENT line %text.model;> + <!ATTLIST line %basic.atts;> + +We'd keep the significance of the leading whitespace of each line +either by converting it to non-breaking spaces at output, or with a +per-line margin. Non-breaking spaces are simpler (for HTML, anyway) +but kludgey, and wouldn't support indented long lines that wrap. But +should inter-word whitespace (i.e., not leading whitespace) be +preserved? Currently it is preserved in line blocks. + +Representing a more complex line block may be tricky:: + + | But can a bee be said to be + | or not to be an entire bee, + | when half the bee is not a bee, + | due to some ancient injury? + +Perhaps the representation could allow for nested line blocks:: + + <!ELEMENT line_block (line | line_block)+> + +With this model, leading whitespace would no longer be significant. +Instead, left margins are implied by the nesting. The example above +could be represented as follows:: + + <line_block> + <line> + But can a bee be said to be + <line_block> + <line> + or not to be an entire bee, + <line_block> + <line> + when half the bee is not a bee, + <line_block> + <line> + due to some ancient injury? + +I wasn't sure what to do about even more complex line blocks:: + + | Indented + | Not indented + | Indented a bit + | A bit more + | Only one space + +How should that be parsed and nested? Should the first line have +the same nesting level (== indentation in the output) as the fourth +line, or the same as the last line? Mark Nodine suggested that such +line blocks be parsed similarly to complexly-nested block quotes, +which seems reasonable. In the example above, this would result in +the nesting of first line matching the last line's nesting. In +other words, the nesting would be relative to neighboring lines +only. + + +Output +------ + +In HTML, line blocks are currently output as "<pre>" blocks, which +gives us significant whitespace and line breaks, but doesn't allow +long lines to wrap and causes monospaced output without stylesheets. +Instead, we could output "<div>" elements parallelling the +representation above, where each nested <div class="line_block"> would +have an increased left margin (specified in the stylesheet). + +Jarno suggested the following HTML output:: + + <div class="line_block"> + <span class="line">First, top level line</span><br class="hidden"/> + <div class="line_block"><span class="hidden"> </span> + <span class="line">Second, once nested</span><br class="hidden"/> + <span class="line">Third, once nested</span><br class="hidden"/> + ... + </div> + ... + </div> + +The ``<br class="hidden" />`` and ``<span +class="hidden"> </span>`` are meant to support non-CSS and +non-graphical browsers. I understand the case for "br", but I'm not +so sure about hidden " ". I question how much effort should be +put toward supporting non-graphical and especially non-CSS browsers, +at least for html4css1.py output. + +Should the lines themselves be ``<span>`` or ``<div>``? I don't like +mixing inline and block-level elements. + + +Implementation Plan +------------------- + +We'll leave the old implementation in place (via the "line-block" +directive only) until all Writers have been updated to support the new +syntax & implementation. The "line-block" directive can then be +updated to use the new internal representation, and its documentation +will be updated to recommend the new syntax. + + +List-Driven Tables +================== + +The original idea came from Dylan Jay: + + ... to use a two level bulleted list with something to + indicate it should be rendered as a table ... + +It's an interesting idea. It could be implemented in as a directive +which transforms a uniform two-level list into a table. Using a +directive would allow the author to explicitly set the table's +orientation (by column or by row), the presence of row headers, etc. + +Alternatives: + +1. (Implemented in Docutils 0.3.8). + + Bullet-list-tables might look like this:: + + .. list-table:: + + * - Treat + - Quantity + - Description + * - Albatross! + - 299 + - On a stick! + * - Crunchy Frog! + - 1499 + - If we took the bones out, it wouldn't be crunchy, + now would it? + * - Gannet Ripple! + - 199 + - On a stick! + + This list must be written in two levels. This wouldn't work:: + + .. list-table:: + + * Treat + * Albatross! + * Gannet! + * Crunchy Frog! + + * Quantity + * 299 + * 199 + * 1499 + + * Description + * On a stick! + * On a stick! + * If we took the bones out... + + The above is a single list of 12 items. The blank lines are not + significant to the markup. We'd have to explicitly specify how + many columns or rows to use, which isn't a good idea. + +2. Beni Cherniavsky suggested a field list alternative. It could look + like this:: + + .. field-list-table:: + :headrows: 1 + + - :treat: Treat + :quantity: Quantity + :descr: Description + + - :treat: Albatross! + :quantity: 299 + :descr: On a stick! + + - :treat: Crunchy Frog! + :quantity: 1499 + :descr: If we took the bones out, it wouldn't be + crunchy, now would it? + + Column order is determined from the order of fields in the first + row. Field order in all other rows is ignored. As a side-effect, + this allows trivial re-arrangement of columns. By using named + fields, it becomes possible to omit fields in some rows without + losing track of things, which is important for spans. + +3. An alternative to two-level bullet lists would be to use enumerated + lists for the table cells:: + + .. list-table:: + + * 1. Treat + 2. Quantity + 3. Description + * 1. Albatross! + 2. 299 + 3. On a stick! + * 1. Crunchy Frog! + 2. 1499 + 3. If we took the bones out, it wouldn't be crunchy, + now would it? + + That provides better correspondence between cells in the same + column than does bullet-list syntax, but not as good as field list + syntax. I think that were only field-list-tables available, a lot + of users would use the equivalent degenerate case:: + + .. field-list-table:: + - :1: Treat + :2: Quantity + :3: Description + ... + +4. Another natural variant is to allow a description list with field + lists as descriptions:: + + .. list-table:: + :headrows: 1 + + Treat + :quantity: Quantity + :descr: Description + Albatross! + :quantity: 299 + :descr: On a stick! + Crunchy Frog! + :quantity: 1499 + :descr: If we took the bones out, it wouldn't be + crunchy, now would it? + + This would make the whole first column a header column ("stub"). + It's limited to a single column and a single paragraph fitting on + one source line. Also it wouldn't allow for empty cells or row + spans in the first column. But these are limitations that we could + live with, like those of simple tables. + +The List-driven table feature could be done in many ways. Each user +will have their preferred usage. Perhaps a single "list-table" +directive could handle them all, depending on which options and +content are present. + +Issues: + +* How to indicate that there's 1 header row? Perhaps two lists? :: + + .. list-table:: + + + - Treat + - Quantity + - Description + + * - Albatross! + - 299 + - On a stick! + + This is probably too subtle though. Better would be a directive + option, like ``:headrows: 1``. An early suggestion for the header + row(s) was to use a directive option:: + + .. field-list-table:: + :header: + - :treat: Treat + :quantity: Quantity + :descr: Description + - :treat: Albatross! + :quantity: 299 + :descr: On a stick! + + But the table data is at two levels and looks inconsistent. + + In general, we cannot extract the header row from field lists' field + names because field names cannot contain everything one might put in + a table cell. A separate header row also allows shorter field names + and doesn't force one to rewrite the whole table when the header + text changes. But for simpler cases, we can offer a ":header: + fields" option, which does extract header cells from field names:: + + .. field-list-table:: + :header: fields + + - :Treat: Albatross! + :Quantity: 299 + :Description: On a stick! + +* How to indicate the column widths? A directive option? :: + + .. list-table:: + :widths: 15 10 35 + + Automatic defaults from the text used? + +* How to handle row and/or column spans? + + In a field list, column-spans can be indicated by specifying the + first and last fields, separated by space-dash-space or ellipsis:: + + - :foo - baz: quuux + - :foo ... baz: quuux + + Commas were proposed for column spans:: + + - :foo, bar: quux + + But non-adjacent columns become problematic. Should we report an + error, or duplicate the value into each span of adjacent columns (as + was suggested)? The latter suggestion is appealing but may be too + clever. Best perhaps to simply specify the two ends. + + It was suggested that comma syntax should be allowed, too, in order + to allow the user to avoid trouble when changing the column order. + But changing the column order of a table with spans is not trivial; + we shouldn't make it easier to mess up. + + One possible syntax for row-spans is to simply treat any row where a + field is missing as a row-span from the last row where it appeared. + Leaving a field empty would still be possible by writing a field + with empty content. But this is too implicit. + + Another way would be to require an explicit continuation marker + (``...``/``-"-``/``"``?) in all but the first row of a spanned + field. Empty comments could work (".."). If implemented, the same + marker could also be supported in simple tables, which lack + row-spanning abilities. + + Explicit markup like ":rowspan:" and ":colspan:" was also suggested. + + Sometimes in a table, the first header row contains spans. It may + be necessary to provide a way to specify the column field names + independently of data rows. A directive option would do it. + +* We could specify "column-wise" or "row-wise" ordering, with the same + markup structure. For example, with definition data:: + + .. list-table:: + :column-wise: + + Treat + - Albatross! + - Crunchy Frog! + Quantity + - 299 + - 1499 + Description + - On a stick! + - If we took the bones out, it wouldn't be + crunchy, now would it? + +* A syntax for _`stubs in grid tables` is easy to imagine:: + + +------------------------++------------+----------+ + | Header row, column 1 || Header 2 | Header 3 | + +========================++============+==========+ + | body row 1, column 1 || column 2 | column 3 | + +------------------------++------------+----------+ + + Or this idea from Nick Moffitt:: + + +-----+---+---+ + | XOR # T | F | + +=====+===+===+ + | T # F | T | + +-----+---+---+ + | F # T | F | + +-----+---+---+ + + +Auto-Enumerated Lists +===================== + +Implemented 2005-03-24: combination of variation 1 & 2. + +The advantage of auto-numbered enumerated lists would be similar to +that of auto-numbered footnotes: lists could be written and rearranged +without having to manually renumber them. The disadvantages are also +the same: input and output wouldn't match exactly; the markup may be +ugly or confusing (depending on which alternative is chosen). + +1. Use the "#" symbol. Example:: + + #. Item 1. + #. Item 2. + #. Item 3. + + Advantages: simple, explicit. Disadvantage: enumeration sequence + cannot be specified (limited to arabic numerals); ugly. + +2. As a variation on #1, first initialize the enumeration sequence? + For example:: + + a) Item a. + #) Item b. + #) Item c. + + Advantages: simple, explicit, any enumeration sequence possible. + Disadvantages: ugly; perhaps confusing with mixed concrete/abstract + enumerators. + +3. Alternative suggested by Fred Bremmer, from experience with MoinMoin:: + + 1. Item 1. + 1. Item 2. + 1. Item 3. + + Advantages: enumeration sequence is explicit (could be multiple + "a." or "(I)" tokens). Disadvantages: perhaps confusing; otherwise + erroneous input (e.g., a duplicate item "1.") would pass silently, + either causing a problem later in the list (if no blank lines + between items) or creating two lists (with blanks). + + Take this input for example:: + + 1. Item 1. + + 1. Unintentional duplicate of item 1. + + 2. Item 2. + + Currently the parser will produce two list, "1" and "1,2" (no + warnings, because of the presence of blank lines). Using Fred's + notation, the current behavior is "1,1,2 -> 1 1,2" (without blank + lines between items, it would be "1,1,2 -> 1 [WARNING] 1,2"). What + should the behavior be with auto-numbering? + + Fred has produced a patch__, whose initial behavior is as follows:: + + 1,1,1 -> 1,2,3 + 1,2,2 -> 1,2,3 + 3,3,3 -> 3,4,5 + 1,2,2,3 -> 1,2,3 [WARNING] 3 + 1,1,2 -> 1,2 [WARNING] 2 + + (After the "[WARNING]", the "3" would begin a new list.) + + I have mixed feelings about adding this functionality to the spec & + parser. It would certainly be useful to some users (myself + included; I often have to renumber lists). Perhaps it's too + clever, asking the parser to guess too much. What if you *do* want + three one-item lists in a row, each beginning with "1."? You'd + have to use empty comments to force breaks. Also, I question + whether "1,2,2 -> 1,2,3" is optimal behavior. + + In response, Fred came up with "a stricter and more explicit rule + [which] would be to only auto-number silently if *all* the + enumerators of a list were identical". In that case:: + + 1,1,1 -> 1,2,3 + 1,2,2 -> 1,2 [WARNING] 2 + 3,3,3 -> 3,4,5 + 1,2,2,3 -> 1,2 [WARNING] 2,3 + 1,1,2 -> 1,2 [WARNING] 2 + + Should any start-value be allowed ("3,3,3"), or should + auto-numbered lists be limited to begin with ordinal-1 ("1", "A", + "a", "I", or "i")? + + __ https://sourceforge.net/tracker/index.php?func=detail&aid=548802 + &group_id=38414&atid=422032 + +4. Alternative proposed by Tony Ibbs:: + + #1. First item. + #3. Aha - I edited this in later. + #2. Second item. + + The initial proposal required unique enumerators within a list, but + this limits the convenience of a feature of already limited + applicability and convenience. Not a useful requirement; dropped. + + Instead, simply prepend a "#" to a standard list enumerator to + indicate auto-enumeration. The numbers (or letters) of the + enumerators themselves are not significant, except: + + - as a sequence indicator (arabic, roman, alphabetic; upper/lower), + + - and perhaps as a start value (first list item). + + Advantages: explicit, any enumeration sequence possible. + Disadvantages: a bit ugly. + + +Adjacent citation references +============================ + +A special case for inline markup was proposed and implemented: +multiple citation references could be joined into one:: + + [cite1]_[cite2]_ instead of requiring [cite1]_ [cite2]_ + +However, this was rejected as an unwarranted exception to the rules +for inline markup. +(The main motivation for the proposal, grouping citations in the latex writer, +was implemented by recognising the second group in the example above and +transforming it into ``\cite{cite1,cite2}``.) + + +Inline markup recognition +========================= + +Implemented 2011-12-05 (version 0.9): +Extended `inline markup recognition rules`_. + +Non-ASCII whitespace, punctuation characters and "international" quotes are +allowed around inline markup (based on `Unicode categories`_). The rules for +ASCII characters were not changed. + +Rejected alternatives: + +a) Use `Unicode categories`_ for all chars (ASCII or not) + + +1 comprehensible, standards based, + -1 many "false positives" need escaping, + -1 not backwards compatible. + +b) full backwards compatibility + + :Pi: only before start-string + :Pf: only behind end-string + :Po: "conservative" sorting of other punctuation: + + :``.,;!?\\``: Close + :``¡¿``: Open + + +1 backwards compatible, + +1 logical extension of the existing rules, + -1 exception list for "other" punctuation needed, + -1 rules even more complicated, + -1 not clear how to sort "other" punctuation that is currently not + recognized, + -2 international quoting convention like + »German ›angular‹ quotes« not recognized. + +.. _Inline markup recognition rules: + ../../ref/rst/restructuredtext.html#inline-markup-recognition-rules +.. _Unicode categories: + https://www.unicode.org/Public/5.1.0/ucd/UCD.html#General_Category_Values + + +----------------- + Not Implemented +----------------- + +Reworking Footnotes +=================== + +As a further wrinkle (see `Reworking Explicit Markup (Round 1)`_ +above), in the wee hours of 2002-02-28 I posted several ideas for +changes to footnote syntax: + + - Change footnote syntax from ``.. [1]`` to ``_[1]``? ... + - Differentiate (with new DTD elements) author-date "citations" + (``[GVR2002]``) from numbered footnotes? ... + - Render footnote references as superscripts without "[]"? ... + +These ideas are all related, and suggest changes in the +reStructuredText syntax as well as the docutils tree model. + +The footnote has been used for both true footnotes (asides expanding +on points or defining terms) and for citations (references to external +works). Rather than dealing with one amalgam construct, we could +separate the current footnote concept into strict footnotes and +citations. Citations could be interpreted and treated differently +from footnotes. Footnotes would be limited to numerical labels: +manual ("1") and auto-numbered (anonymous "#", named "#label"). + +The footnote is the only explicit markup construct (starts with ".. ") +that directly translates to a visible body element. I've always been +a little bit uncomfortable with the ".. " marker for footnotes because +of this; ".. " has a connotation of "special", but footnotes aren't +especially "special". Printed texts often put footnotes at the bottom +of the page where the reference occurs (thus "foot note"). Some HTML +designs would leave footnotes to be rendered the same positions where +they're defined. Other online and printed designs will gather +footnotes into a section near the end of the document, converting them +to "endnotes" (perhaps using a directive in our case); but this +"special processing" is not an intrinsic property of the footnote +itself, but a decision made by the document author or processing +system. + +Citations are almost invariably collected in a section at the end of a +document or section. Citations "disappear" from where they are +defined and are magically reinserted at some well-defined point. +There's more of a connection to the "special" connotation of the ".. " +syntax. The point at which the list of citations is inserted could be +defined manually by a directive (e.g., ".. citations::"), and/or have +default behavior (e.g., a section automatically inserted at the end of +the document) that might be influenced by options to the Writer. + +Syntax proposals: + ++ Footnotes: + + - Current syntax:: + + .. [1] Footnote 1 + .. [#] Auto-numbered footnote. + .. [#label] Auto-labeled footnote. + + - The syntax proposed in the original 2002-02-28 Doc-SIG post: + remove the ".. ", prefix a "_":: + + _[1] Footnote 1 + _[#] Auto-numbered footnote. + _[#label] Auto-labeled footnote. + + The leading underscore syntax (earlier dropped because + ``.. _[1]:`` was too verbose) is a useful reminder that footnotes + are hyperlink targets. + + - Minimal syntax: remove the ".. [" and "]", prefix a "_", and + suffix a ".":: + + _1. Footnote 1. + _#. Auto-numbered footnote. + _#label. Auto-labeled footnote. + + ``_1.``, ``_#.``, and ``_#label.`` are markers, + like list markers. + + Footnotes could be rendered something like this in HTML + + | 1. This is a footnote. The brackets could be dropped + | from the label, and a vertical bar could set them + | off from the rest of the document in the HTML. + + Two-way hyperlinks on the footnote marker ("1." above) would also + help to differentiate footnotes from enumerated lists. + + If converted to endnotes (by a directive/transform), a horizontal + half-line might be used instead. Page-oriented output formats + would typically use the horizontal line for true footnotes. + ++ Footnote references: + + - Current syntax:: + + [1]_, [#]_, [#label]_ + + - Minimal syntax to match the minimal footnote syntax above:: + + 1_, #_, #label_ + + As a consequence, pure-numeric hyperlink references would not be + possible; they'd be interpreted as footnote references. + ++ Citation references: no change is proposed from the current footnote + reference syntax:: + + [GVR2001]_ + ++ Citations: + + - Current syntax (footnote syntax):: + + .. [GVR2001] Python Documentation; van Rossum, Drake, et al.; + https://www.python.org/doc/ + + - Possible new syntax:: + + _[GVR2001] Python Documentation; van Rossum, Drake, et al.; + https://www.python.org/doc/ + + _[DJG2002] + Docutils: Python Documentation Utilities project; Goodger + et al.; https://docutils.sourceforge.io/ + + Without the ".. " marker, subsequent lines would either have to + align as in one of the above, or we'd have to allow loose + alignment (I'd rather not):: + + _[GVR2001] Python Documentation; van Rossum, Drake, et al.; + https://www.python.org/doc/ + +I proposed adopting the "minimal" syntax for footnotes and footnote +references, and adding citations and citation references to +reStructuredText's repertoire. The current footnote syntax for +citations is better than the alternatives given. + +From a reply by Tony Ibbs on 2002-03-01: + + However, I think easier with examples, so let's create one:: + + Fans of Terry Pratchett are perhaps more likely to use + footnotes [1]_ in their own writings than other people + [2]_. Of course, in *general*, one only sees footnotes + in academic or technical writing - it's use in fiction + and letter writing is not normally considered good + style [4]_, particularly in emails (not a medium that + lends itself to footnotes). + + .. [1] That is, little bits of referenced text at the + bottom of the page. + .. [2] Because Terry himself does, of course [3]_. + .. [3] Although he has the distinction of being + *funny* when he does it, and his fans don't always + achieve that aim. + .. [4] Presumably because it detracts from linear + reading of the text - this is, of course, the point. + + and look at it with the second syntax proposal:: + + Fans of Terry Pratchett are perhaps more likely to use + footnotes [1]_ in their own writings than other people + [2]_. Of course, in *general*, one only sees footnotes + in academic or technical writing - it's use in fiction + and letter writing is not normally considered good + style [4]_, particularly in emails (not a medium that + lends itself to footnotes). + + _[1] That is, little bits of referenced text at the + bottom of the page. + _[2] Because Terry himself does, of course [3]_. + _[3] Although he has the distinction of being + *funny* when he does it, and his fans don't always + achieve that aim. + _[4] Presumably because it detracts from linear + reading of the text - this is, of course, the point. + + (I note here that if I have gotten the indentation of the + footnotes themselves correct, this is clearly not as nice. And if + the indentation should be to the left margin instead, I like that + even less). + + and the third (new) proposal:: + + Fans of Terry Pratchett are perhaps more likely to use + footnotes 1_ in their own writings than other people + 2_. Of course, in *general*, one only sees footnotes + in academic or technical writing - it's use in fiction + and letter writing is not normally considered good + style 4_, particularly in emails (not a medium that + lends itself to footnotes). + + _1. That is, little bits of referenced text at the + bottom of the page. + _2. Because Terry himself does, of course 3_. + _3. Although he has the distinction of being + *funny* when he does it, and his fans don't always + achieve that aim. + _4. Presumably because it detracts from linear + reading of the text - this is, of course, the point. + + I think I don't, in practice, mind the targets too much (the use + of a dot after the number helps a lot here), but I do have a + problem with the body text, in that I don't naturally separate out + the footnotes as different than the rest of the text - instead I + keep wondering why there are numbers interspered in the text. The + use of brackets around the numbers ([ and ]) made me somehow parse + the footnote references as "odd" - i.e., not part of the body text + - and thus both easier to skip, and also (paradoxically) easier to + pick out so that I could follow them. + + Thus, for the moment (and as always susceptable to argument), I'd + say -1 on the new form of footnote reference (i.e., I much prefer + the existing ``[1]_`` over the proposed ``1_``), and ambivalent + over the proposed target change. + + That leaves David's problem of wanting to distinguish footnotes + and citations - and the only thing I can propose there is that + footnotes are numeric or # and citations are not (which, as a + human being, I can probably cope with!). + +From a reply by Paul Moore on 2002-03-01: + + I think the current footnote syntax ``[1]_`` is *exactly* the + right balance of distinctness vs unobtrusiveness. I very + definitely don't think this should change. + + On the target change, it doesn't matter much to me. + +From a further reply by Tony Ibbs on 2002-03-01, referring to the +"[1]" form and actual usage in email: + + Clearly this is a form people are used to, and thus we should + consider it strongly (in the same way that the usage of ``*..*`` + to mean emphasis was taken partly from email practise). + + Equally clearly, there is something "magical" for people in the + use of a similar form (i.e., ``[1]``) for both footnote reference + and footnote target - it seems natural to keep them similar. + + ... + + I think that this established plaintext usage leads me to strongly + believe we should retain square brackets at both ends of a + footnote. The markup of the reference end (a single trailing + underscore) seems about as minimal as we can get away with. The + markup of the target end depends on how one envisages the thing - + if ".." means "I am a target" (as I tend to see it), then that's + good, but one can also argue that the "_[1]" syntax has a neat + symmetry with the footnote reference itself, if one wishes (in + which case ".." presumably means "hidden/special" as David seems + to think, which is why one needs a ".." *and* a leading underline + for hyperlink targets. + +Given the persuading arguments voiced, we'll leave footnote & footnote +reference syntax alone. Except that these discussions gave rise to +the "auto-symbol footnote" concept, which has been added. Citations +and citation references have also been added. + + +Syntax for Questions & Answers +============================== + +Implement as a generic two-column marked list? As a standalone +(non-directive) construct? (Is the markup ambiguous?) Add support to +parts.contents? + +New elements would be required. Perhaps:: + + <!ELEMENT question_list (question_list_item+)> + <!ATTLIST question_list + numbering (none | local | global) + #IMPLIED + start NUMBER #IMPLIED> + <!ELEMENT question_list_item (question, answer*)> + <!ELEMENT question %text.model;> + <!ELEMENT answer (%body.elements;)+> + +Originally I thought of implementing a Q&A list with special syntax:: + + Q: What am I? + + A: You are a question-and-answer + list. + + Q: What are you? + + A: I am the omniscient "we". + +Where each "Q" and "A" could also be numbered (e.g., "Q1"). However, +a simple enumerated or bulleted list will do just fine for syntax. A +directive could treat the list specially; e.g. the first paragraph +could be treated as a question, the remainder as the answer (multiple +answers could be represented by nested lists). Without special +syntax, this directive becomes low priority. + +As described in the FAQ__, no special syntax or directive is needed +for this application. + +__ https://docutils.sourceforge.io/FAQ.html + #how-can-i-mark-up-a-faq-or-other-list-of-questions-answers + + +-------- + Tabled +-------- + +Reworking Explicit Markup (Round 2) +=================================== + +See `Reworking Explicit Markup (Round 1)`_ for an earlier discussion. + +In April 2004, a new thread becan on docutils-develop: `Inconsistency +in RST markup`__. Several arguments were made; the first argument +begat later arguments. Below, the arguments are paraphrased "in +quotes", with responses. + +__ https://thread.gmane.org/gmane.text.docutils.devel/1386 + +1. References and targets take this form:: + + targetname_ + + .. _targetname: stuff + + But footnotes, "which generate links just like targets do", are + written as:: + + [1]_ + + .. [1] stuff + + "Footnotes should be written as":: + + [1]_ + + .. _[1]: stuff + + But they're not the same type of animal. That's not a "footnote + target", it's a *footnote*. Being a target is not a footnote's + primary purpose (an arguable point). It just happens to grow a + target automatically, for convenience. Just as a section title:: + + Title + ===== + + isn't a "title target", it's a *title*, which happens to grow a + target automatically. The consistency is there, it's just deeper + than at first glance. + + Also, ".. [1]" was chosen for footnote syntax because it closely + resembles one form of actual footnote rendering. ".. _[1]:" is too + verbose; excessive punctuation is required to get the job done. + + For more of the reasoning behind the syntax, see `Problems With + StructuredText (Hyperlinks) <problems.html#hyperlinks>`__ and + `Reworking Footnotes`_. + +2. "I expect directives to also look like ``.. this:`` [one colon] + because that also closely parallels the link and footnote target + markup." + + There are good reasons for the two-colon syntax: + + Two colons are used after the directive type for these reasons: + + - Two colons are distinctive, and unlikely to be used in common + text. + + - Two colons avoids clashes with common comment text like:: + + .. Danger: modify at your own risk! + + - If an implementation of reStructuredText does not recognize a + directive (i.e., the directive-handler is not installed), a + level-3 (error) system message is generated, and the entire + directive block (including the directive itself) will be + included as a literal block. Thus "::" is a natural choice. + + -- `restructuredtext.html#directives + <../../ref/rst/restructuredtext.html#directives>`__ + + The last reason is not particularly compelling; it's more of a + convenient coincidence or mnemonic. + +3. "Comments always seemed too easy. I almost never write comments. + I'd have no problem writing '.. comment:' in front of my comments. + In fact, it would probably be more readable, as comments *should* + be set off strongly, because they are very different from normal + text." + + Many people do use comments though, and some applications of + reStructuredText require it. For example, all reStructuredText + PEPs (and this document!) have an Emacs stanza at the bottom, in a + comment. Having to write ".. comment::" would be very obtrusive. + + Comments *should* be dirt-easy to do. It should be easy to + "comment out" a block of text. Comments in programming languages + and other markup languages are invariably easy. + + Any author is welcome to preface their comments with "Comment:" or + "Do Not Print" or "Note to Editor" or anything they like. A + "comment" directive could easily be implemented. It might be + confused with admonition directives, like "note" and "caution" + though. In unrelated (and unpublished and unfinished) work, adding + a "comment" directive as a true document element was considered:: + + If structure is necessary, we could use a "comment" directive + (to avoid nonsensical DTD changes, the "comment" directive + could produce an untitled topic element). + +4. "One of the goals of reStructuredText is to be *readable* by people + who don't know it. This construction violates that: it is not at + all obvious to the uninitiated that text marked by '..' is a + comment. On the other hand, '.. comment:' would be totally + transparent." + + Totally transparent, perhaps, but also very obtrusive. Another of + `reStructuredText's goals`_ is to be unobtrusive, and + ".. comment::" would violate that. The goals of reStructuredText + are many, and they conflict. Determining the right set of goals + and finding solutions that best fit is done on a case-by-case + basis. + + Even readability is has two aspects. Being readable without any + prior knowledge is one. Being as easily read in raw form as in + processed form is the other. ".." may not contribute to the former + aspect, but ".. comment::" would certainly detract from the latter. + + .. _author's note: + .. _reStructuredText's goals: ../../ref/rst/introduction.html#goals + +5. "Recently I sent someone an rst document, and they got confused; I + had to explain to them that '..' marks comments, *unless* it's a + directive, etc..." + + The explanation of directives *is* roundabout, defining comments in + terms of not being other things. That's definitely a wart. + +6. "Under the current system, a mistyped directive (with ':' instead + of '::') will be silently ignored. This is an error that could + easily go unnoticed." + + A parser option/setting like "--comments-on-stderr" would help. + +7. "I'd prefer to see double-dot-space / command / double-colon as the + standard Docutils markup-marker. It's unusual enough to avoid + being accidentally used. Everything that starts with a double-dot + should end with a double-colon." + + That would increase the punctuation verbosity of some constructs + considerably. + +8. Edward Loper proposed the following plan for backwards + compatibility: + + 1. ".. foo" will generate a deprecation warning to stderr, and + nothing in the output (no system messages). + 2. ".. foo: bar" will be treated as a directive foo. If there + is no foo directive, then do the normal error output. + 3. ".. foo:: bar" will generate a deprecation warning to + stderr, and be treated as a directive. Or leave it valid? + + So some existing documents might start printing deprecation + warnings, but the only existing documents that would *break* + would be ones that say something like:: + + .. warning: this should be a comment + + instead of:: + + .. warning:: this should be a comment + + Here, we're trading fairly common a silent error (directive + falsely treated as a comment) for a fairly uncommon explicitly + flagged error (comment falsely treated as directive). To make + things even easier, we could add a sentence to the + unknown-directive error. Something like "If you intended to + create a comment, please use '.. comment:' instead". + +On one hand, I understand and sympathize with the points raised. On +the other hand, I think the current syntax strikes the right balance +(but I acknowledge a possible lack of objectivity). On the gripping +hand, the comment and directive syntax has become well established, so +even if it's a wart, it may be a wart we have to live with. + +Making any of these changes would cause a lot of breakage or at least +deprecation warnings. I'm not sure the benefit is worth the cost. + +For now, we'll treat this as an unresolved legacy issue. + + +------- + To Do +------- + +Nested Inline Markup +==================== + +These are collected notes on a long-discussed issue. The original +mailing list messages should be referred to for details. + +* In a 2001-10-31 discussion I wrote: + + Try, for example, `Ed Loper's 2001-03-21 post`_, which details + some rules for nested inline markup. I think the complexity is + prohibitive for the marginal benefit. (And if you can understand + that tree without going mad, you're a better man than I. ;-) + + Inline markup is already fragile. Allowing nested inline markup + would only be asking for trouble IMHO. If it proves absolutely + necessary, it can be added later. The rules for what can appear + inside what must be well thought out first though. + + .. _Ed Loper's 2001-03-21 post: + https://mail.python.org/pipermail/doc-sig/2001-March/001487.html + + -- https://mail.python.org/pipermail/doc-sig/2001-October/002354.html + +* In a 2001-11-09 Doc-SIG post, I wrote: + + The problem is that in the + what-you-see-is-more-or-less-what-you-get markup language that + is reStructuredText, the symbols used for inline markup ("*", + "**", "`", "``", etc.) may preclude nesting. + + I've rethought this position. Nested markup is not precluded, just + tricky. People and software parse "double and 'single' quotes" all + the time. Continuing, + + I've thought over how we might implement nested inline + markup. The first algorithm ("first identify the outer inline + markup as we do now, then recursively scan for nested inline + markup") won't work; counterexamples were given in my `last post + <https://mail.python.org/pipermail/doc-sig/2001-November/002363.html>`__. + + The second algorithm makes my head hurt:: + + while 1: + scan for start-string + if found: + push on stack + scan for start or end string + if new start string found: + recurse + elif matching end string found: + pop stack + elif non-matching end string found: + if its a markup error: + generate warning + elif the initial start-string was misinterpreted: + # e.g. in this case: ***strong** in emphasis* + restart with the other interpretation + # but it might be several layers back ... + ... + + This is similar to how the parser does section title + recognition, but sections are much more regular and + deterministic. + + Bottom line is, I don't think the benefits are worth the effort, + even if it is possible. I'm not going to try to write the code, + at least not now. If somebody codes up a consistent, working, + general solution, I'll be happy to consider it. + + -- https://mail.python.org/pipermail/doc-sig/2001-November/002388.html + +* In a `2003-05-06 Docutils-Users post`__ Paul Tremblay proposed a new + syntax to allow for easier nesting. It eventually evolved into + this:: + + :role:[inline text] + + The duplication with the existing interpreted text syntax is + problematic though. + + __ https://article.gmane.org/gmane.text.docutils.user/317 + +* Could the parser be extended to parse nested interpreted text? :: + + :emphasis:`Some emphasized text with :strong:`some more + emphasized text` in it and **perhaps** :reference:`a link`` + +* In a `2003-06-18 Docutils-Develop post`__, Mark Nodine reported on + his implementation of a form of nested inline markup in his + Perl-based parser (unpublished). He brought up some interesting + ideas. The implementation was flawed, however, by the change in + semantics required for backslash escapes. + + __ https://article.gmane.org/gmane.text.docutils.devel/795 + +* Docutils-develop threads between David Abrahams, David Goodger, and + Mark Nodine (beginning 2004-01-16__ and 2004-01-19__) hashed out + many of the details of a potentially successful implementation, as + described below. David Abrahams checked in code to the "nesting" + branch of CVS, awaiting thorough review. + + __ https://thread.gmane.org/gmane.text.docutils.devel/1102 + __ https://thread.gmane.org/gmane.text.docutils.devel/1125 + +It may be possible to accomplish nested inline markup in general with +a more powerful inline markup parser. There may be some issues, but +I'm not averse to the idea of nested inline markup in general. I just +don't have the time or inclination to write a new parser now. Of +course, a good patch would be welcome! + +I envisage something like this. Explicit-role interpreted text must +be nestable. Prefix-based is probably preferred, since suffix-based +will look like inline literals:: + + ``text`:role1:`:role2: + +But it can be disambiguated, so it ought to be left up to the author:: + + `\ `text`:role1:`:role2: + +In addition, other forms of inline markup may be nested if +unambiguous:: + + *emphasized ``literal`` and |substitution ref| and link_* + +IOW, the parser ought to be as permissive as possible. + + +Index Entries & Indexes +======================= + +Were I writing a book with an index, I guess I'd need two +different kinds of index targets: inline/implicit and +out-of-line/explicit. For example:: + + In this `paragraph`:index:, several words are being + `marked`:index: inline as implicit `index`:index: + entries. + + .. index:: markup + .. index:: syntax + + The explicit index directives above would refer to + this paragraph. It might also make sense to allow multiple + entries in an ``index`` directive: + + .. index:: + markup + syntax + +The words "paragraph", "marked", and "index" would become index +entries pointing at the words in the first paragraph. The index +entry words appear verbatim in the text. (Don't worry about the +ugly ":index:" part; if indexing is the only/main application of +interpreted text in your documents, it can be implicit and +omitted.) The two directives provide manual indexing, where the +index entry words ("markup" and "syntax") do not appear in the +main text. We could combine the two directives into one:: + + .. index:: markup; syntax + +Semicolons instead of commas because commas could *be* part of the +index target, like:: + + .. index:: van Rossum, Guido + +Another reason for index directives is because other inline markup +wouldn't be possible within inline index targets. + +Sometimes index entries have multiple levels. Given:: + + .. index:: statement syntax: expression statements + +In a hypothetical index, combined with other entries, it might +look like this:: + + statement syntax + expression statements ..... 56 + assignment ................ 57 + simple statements ......... 58 + compound statements ....... 60 + +Inline multi-level index targets could be done too. Perhaps +something like:: + + When dealing with `expression statements <statement syntax:>`, + we must remember ... + +The opposite sense could also be possible:: + + When dealing with `index entries <:multi-level>`, there are + many permutations to consider. + +Also "see / see also" index entries. + +Given:: + + Here's a paragraph. + + .. index:: paragraph + +(The "index" directive above actually targets the *preceding* +object.) The directive should produce something like this XML:: + + <paragraph> + <index_entry text="paragraph"/> + Here's a paragraph. + </paragraph> + +This kind of content model would also allow true inline +index-entries:: + + Here's a `paragraph`:index:. + +If the "index" role were the default for the application, it could be +dropped:: + + Here's a `paragraph`. + +Both of these would result in this XML:: + + <paragraph> + Here's a <index_entry>paragraph</index_entry>. + </paragraph> + + +from 2002-06-24 docutils-develop posts +-------------------------------------- + + If all of your index entries will appear verbatim in the text, + this should be sufficient. If not (e.g., if you want "Van Rossum, + Guido" in the index but "Guido van Rossum" in the text), we'll + have to figure out a supplemental mechanism, perhaps using + substitutions. + +I've thought a bit more on this, and I came up with two possibilities: + +1. Using interpreted text, embed the index entry text within the + interpreted text:: + + ... by `Guido van Rossum [Van Rossum, Guido]` ... + + The problem with this is obvious: the text becomes cluttered and + hard to read. The processed output would drop the text in + brackets, which goes against the spirit of interpreted text. + +2. Use substitutions:: + + ... by |Guido van Rossum| ... + + .. |Guido van Rossum| index:: Van Rossum, Guido + + A problem with this is that each substitution definition must have + a unique name. A subsequent ``.. |Guido van Rossum| index:: BDFL`` + would be illegal. Some kind of anonymous substitution definition + mechanism would be required, but I think that's going too far. + +Both of these alternatives are flawed. Any other ideas? + + +------------------- + ... Or Not To Do? +------------------- + +This is the realm of the possible but questionably probable. These +ideas are kept here as a record of what has been proposed, for +posterity and in case any of them prove to be useful. + + +Compound Enumerated Lists +========================= + +Allow for compound enumerators, such as "1.1." or "1.a." or "1(a)", to +allow for nested enumerated lists without indentation? + + +Indented Lists +============== + +Allow for variant styles by interpreting indented lists as if they +weren't indented? For example, currently the list below will be +parsed as a list within a block quote:: + + paragraph + + * list item 1 + * list item 2 + +But a lot of people seem to write that way, and HTML browsers make it +look as if that's the way it should be. The parser could check the +contents of block quotes, and if they contain only a single list, +remove the block quote wrapper. There would be two problems: + +1. What if we actually *do* want a list inside a block quote? + +2. What if such a list comes immediately after an indented construct, + such as a literal block? + +Both could be solved using empty comments (problem 2 already exists +for a block quote after a literal block). But that's a hack. + +Perhaps a runtime setting, allowing or disabling this convenience, +would be appropriate. But that raises issues too: + + User A, who writes lists indented (and their config file is set up + to allow it), sends a file to user B, who doesn't (and their + config file disables indented lists). The result of processing by + the two users will be different. + +It may seem minor, but it adds ambiguity to the parser, which is bad. + +See the `Doc-SIG discussion starting 2001-04-18`__ with Ed Loper's +"Structuring: a summary; and an attempt at EBNF", item 4 (and +follow-ups, here__ and here__). Also `docutils-users, 2003-02-17`__ +and `beginning 2003-08-04`__. + +__ https://mail.python.org/pipermail/doc-sig/2001-April/001776.html +__ https://mail.python.org/pipermail/doc-sig/2001-April/001789.html +__ https://mail.python.org/pipermail/doc-sig/2001-April/001793.html +__ https://sourceforge.net/mailarchive/message.php?msg_id=3838913 +__ https://sf.net/mailarchive/forum.php?thread_id=2957175&forum_id=11444 + + +Sloppy Indentation of List Items +================================ + +Perhaps the indentation shouldn't be so strict. Currently, this is +required:: + + 1. First line, + second line. + +Anything wrong with this? :: + + 1. First line, + second line. + +Problem? :: + + 1. First para. + + Block quote. (no good: requires some indent relative to first + para) + + Second Para. + + 2. Have to carefully define where the literal block ends:: + + Literal block + + Literal block? + +Hmm... Non-strict indentation isn't such a good idea. + + +Lazy Indentation of List Items +============================== + +Another approach: Going back to the first draft of reStructuredText +(2000-11-27 post to Doc-SIG):: + + - This is the fourth item of the main list (no blank line above). + The second line of this item is not indented relative to the + bullet, which precludes it from having a second paragraph. + +Change that to *require* a blank line above and below, to reduce +ambiguity. This "loosening" may be added later, once the parser's +been nailed down. However, a serious drawback of this approach is to +limit the content of each list item to a single paragraph. + + +David's Idea for Lazy Indentation +--------------------------------- + +Consider a paragraph in a word processor. It is a single logical line +of text which ends with a newline, soft-wrapped arbitrarily at the +right edge of the page or screen. We can think of a plaintext +paragraph in the same way, as a single logical line of text, ending +with two newlines (a blank line) instead of one, and which may contain +arbitrary line breaks (newlines) where it was accidentally +hard-wrapped by an application. We can compensate for the accidental +hard-wrapping by "unwrapping" every unindented second and subsequent +line. The indentation of the first line of a paragraph or list item +would determine the indentation for the entire element. Blank lines +would be required between list items when using lazy indentation. + +The following example shows the lazy indentation of multiple body +elements:: + + - This is the first paragraph + of the first list item. + + Here is the second paragraph + of the first list item. + + - This is the first paragraph + of the second list item. + + Here is the second paragraph + of the second list item. + +A more complex example shows the limitations of lazy indentation:: + + - This is the first paragraph + of the first list item. + + Next is a definition list item: + + Term + Definition. The indentation of the term is + required, as is the indentation of the definition's + first line. + + When the definition extends to more than + one line, lazy indentation may occur. (This is the second + paragraph of the definition.) + + - This is the first paragraph + of the second list item. + + - Here is the first paragraph of + the first item of a nested list. + + So this paragraph would be outside of the nested list, + but inside the second list item of the outer list. + + But this paragraph is not part of the list at all. + +And the ambiguity remains:: + + - Look at the hyphen at the beginning of the next line + - is it a second list item marker, or a dash in the text? + + Similarly, we may want to refer to numbers inside enumerated + lists: + + 1. How many socks in a pair? There are + 2. How many pants in a pair? Exactly + 1. Go figure. + +Literal blocks and block quotes would still require consistent +indentation for all their lines. For block quotes, we might be able +to get away with only requiring that the first line of each contained +element be indented. For example:: + + Here's a paragraph. + + This is a paragraph inside a block quote. + Second and subsequent lines need not be indented at all. + + - A bullet list inside + the block quote. + + Second paragraph of the + bullet list inside the block quote. + +Although feasible, this form of lazy indentation has problems. The +document structure and hierarchy is not obvious from the indentation, +making the source plaintext difficult to read. This will also make +keeping track of the indentation while writing difficult and +error-prone. However, these problems may be acceptable for Wikis and +email mode, where we may be able to rely on less complex structure +(few nested lists, for example). + + +Multiple Roles in Interpreted Text +================================== + +In reStructuredText, inline markup cannot be nested (yet; `see +above`__). This also applies to interpreted text. In order to +simultaneously combine multiple roles for a single piece of text, a +syntax extension would be necessary. Ideas: + +1. Initial idea:: + + `interpreted text`:role1,role2: + +2. Suggested by Jason Diamond:: + + `interpreted text`:role1:role2: + +If a document is so complex as to require nested inline markup, +perhaps another markup system should be considered. By design, +reStructuredText does not have the flexibility of XML. + +__ `Nested Inline Markup`_ + + +Parameterized Interpreted Text +============================== + +In some cases it may be expedient to pass parameters to interpreted +text, analogous to function calls. Ideas: + +1. Parameterize the interpreted text role itself (suggested by Jason + Diamond):: + + `interpreted text`:role1(foo=bar): + + Positional parameters could also be supported:: + + `CSS`:acronym(Cascading Style Sheets): is used for HTML, and + `CSS`:acronym(Content Scrambling System): is used for DVDs. + + Technical problem: current interpreted text syntax does not + recognize roles containing whitespace. Design problem: this smells + like programming language syntax, but reStructuredText is not a + programming language. + +2. Put the parameters inside the interpreted text:: + + `CSS (Cascading Style Sheets)`:acronym: is used for HTML, and + `CSS (Content Scrambling System)`:acronym: is used for DVDs. + + Although this could be defined on an individual basis (per role), + we ought to have a standard. Hyperlinks with embedded URIs already + use angle brackets; perhaps they could be used here too:: + + `CSS <Cascading Style Sheets>`:acronym: is used for HTML, and + `CSS <Content Scrambling System>`:acronym: is used for DVDs. + + Do angle brackets connote URLs too much for this to be acceptable? + How about the "tag" connotation -- does it save them or doom them? + +3. `Nested inline markup`_ could prove useful here:: + + `CSS :def:`Cascading Style Sheets``:acronym: is used for HTML, + and `CSS :def:`Content Scrambling System``:acronym: is used for + DVDs. + + Inline markup roles could even define the default roles of nested + inline markup, allowing this cleaner syntax:: + + `CSS `Cascading Style Sheets``:acronym: is used for HTML, and + `CSS `Content Scrambling System``:acronym: is used for DVDs. + +Does this push inline markup too far? Readability becomes a serious +issue. Substitutions may provide a better alternative (at the expense +of verbosity and duplication) by pulling the details out of the text +flow:: + + |CSS| is used for HTML, and |CSS-DVD| is used for DVDs. + + .. |CSS| acronym:: Cascading Style Sheets + .. |CSS-DVD| acronym:: Content Scrambling System + :text: CSS + +---------------------------------------------------------------------- + +This whole idea may be going beyond the scope of reStructuredText. +Documents requiring this functionality may be better off using XML or +another markup system. + +This argument comes up regularly when pushing the envelope of +reStructuredText syntax. I think it's a useful argument in that it +provides a check on creeping featurism. In many cases, the resulting +verbosity produces such unreadable plaintext that there's a natural +desire *not* to use it unless absolutely necessary. It's a matter of +finding the right balance. + + +Syntax for Interpreted Text Role Bindings +========================================= + +The following syntax (idea from Jeffrey C. Jacobs) could be used to +associate directives with roles:: + + .. :rewrite: class:: rewrite + + `She wore ribbons in her hair and it lay with streaks of + grey`:rewrite: + +The syntax is similar to that of substitution declarations, and the +directive/role association may resolve implementation issues. The +semantics, ramifications, and implementation details would need to be +worked out. + +The example above would implement the "rewrite" role as adding a +``class="rewrite"`` attribute to the interpreted text ("inline" +element). The stylesheet would then pick up on the "class" attribute +to do the actual formatting. + +The advantage of the new syntax would be flexibility. Uses other than +"class" may present themselves. The disadvantage is complexity: +having to implement new syntax for a relatively specialized operation, +and having new semantics in existing directives ("class::" would do +something different). + +The `"role" directive`__ has been implemented. + +__ ../../ref/rst/directives.html#role + + +Character Processing +==================== + +Several people have suggested adding some form of character processing +to reStructuredText: + +* Some sort of automated replacement of ASCII sequences: + + - ``--`` to em-dash (or ``--`` to en-dash, and ``---`` to em-dash). + - Convert quotes to curly quote entities. (Essentially impossible + for HTML? Unnecessary for TeX.) + - Various forms of ``:-)`` to smiley icons. + - ``"\ "`` to  . Problem with line-wrapping though: it could + end up escaping the newline. + - Escaped newlines to <BR>. + - Escaped period or quote or dash as a disappearing catalyst to + allow character-level inline markup? + +* XML-style character entities, such as "©" for the copyright + symbol. + +Docutils has no need of a character entity subsystem. Supporting +Unicode and text encodings, character entities should be directly +represented in the text: a copyright symbol should be represented by +the copyright symbol character. If this is not possible in an +authoring environment, a pre-processing stage can be added, or a table +of substitution definitions can be devised. + +A "unicode" directive has been implemented to allow direct +specification of esoteric characters. In combination with the +substitution construct, "include" files defining common sets of +character entities can be defined and used. `A set of character +entity set definition files have been defined`__ (`tarball`__). +There's also `a description and instructions for use`__. + +__ https://docutils.sourceforge.io/tmp/charents/ +__ https://docutils.sourceforge.io/tmp/charents.tgz +__ https://docutils.sourceforge.io/tmp/charents/README.html + +To allow for `character-level inline markup`_, a limited form of +character processing has been added to the spec and parser: escaped +whitespace characters are removed from the processed document. Any +further character processing will be of this functional type, rather +than of the character-encoding type. + +.. _character-level inline markup: + ../../ref/rst/restructuredtext.html#character-level-inline-markup + +* Directive idea:: + + .. text-replace:: "pattern" "replacement" + + - Support Unicode "U+XXXX" codes. + - Support regexps, perhaps with alternative "regexp-replace" + directive. + - Flags for regexps; ":flags:" option, or individuals. + - Specifically, should the default be case-sensistive or + -insensitive? + + +Page Or Line Breaks +=================== + +* Should ^L (or something else in reST) be defined to mean + force/suggest page breaks in whatever output we have? + + A "break" or "page-break" directive would be easy to add. A new + doctree element would be required though (perhaps "break"). The + final behavior would be up to the Writer. The directive argument + could be one of page/column/recto/verso for added flexibility. + + Currently ^L (Python's ``\f``) characters are treated as whitespace. + They're converted to single spaces, actually, as are vertical tabs + (^K, Python's ``\v``). It would be possible to recognize form feeds + as markup, but it requires some thought and discussion first. Are + there any downsides? Many editing environments do not allow the + insertion of control characters. Will it cause any harm? It would + be useful as a shorthand for the directive. + + It's common practice to use ^L before Emacs "Local Variables" + lists:: + + ^L + .. + Local Variables: + mode: indented-text + indent-tabs-mode: nil + sentence-end-double-space: t + fill-column: 70 + End: + + These are already present in many PEPs and Docutils project + documents. From the Emacs manual (info): + + A "local variables list" goes near the end of the file, in the + last page. (It is often best to put it on a page by itself.) + + It would be unfortunate if this construct caused a final blank page + to be generated (for those Writers that recognize the page breaks). + We'll have to add a transform that looks for a "break" plus zero or + more comments at the end of a document, and removes them. + + Probably a bad idea because there is no such thing as a page in a + generic document format. + +* Could the "break" concept above be extended to inline forms? + E.g. "^L" in the middle of a sentence could cause a line break. + Only recognize it at the end of a line (i.e., ``\f\n``)? + + Or is formfeed inappropriate? Perhaps vertical tab (``\v``), but + even that's a stretch. Can't use carriage returns, since they're + commonly used for line endings. + + Probably a bad idea as well because we do not want to use control + characters for well-readable and well-writable markup, and after all + we have the line block syntax for line breaks. + + +Superscript Markup +================== + +Add ``^superscript^`` inline markup? The only common non-markup uses +of "^" I can think of are as short hand for "superscript" itself and +for describing control characters ("^C to cancel"). The former +supports the proposed syntax, and it could be argued that the latter +ought to be literal text anyhow (e.g. "``^C`` to cancel"). + +However, superscripts are seldom needed, and new syntax would break +existing documents. When it's needed, the ``:superscript:`` +(``:sup:``) role can we used as well. + + +Code Execution +============== + +Add the following directives? + +- "exec": Execute Python code & insert the results. Call it + "python" to allow for other languages? + +- "system": Execute an ``os.system()`` call, and insert the results + (possibly as a literal block). Definitely dangerous! How to make + it safe? Perhaps such processing should be left outside of the + document, in the user's production system (a makefile or a script or + whatever). Or, the directive could be disabled by default and only + enabled with an explicit command-line option or config file setting. + Even then, an interactive prompt may be useful, such as: + + The file.txt document you are processing contains a "system" + directive requesting that the ``sudo rm -rf /`` command be + executed. Allow it to execute? (y/N) + +- "eval": Evaluate an expression & insert the text. At parse + time or at substitution time? Dangerous? Perhaps limit to canned + macros; see text.date_. + + .. _text.date: ../todo.html#text-date + +It's too dangerous (or too complicated in the case of "eval"). We do +not want to have such things in the core. + + +``encoding`` Directive +====================== + +Add an "encoding" directive to specify the character encoding of the +input data? Not a good idea for the following reasons: + +- When it sees the directive, the parser will already have read the + input data, and encoding determination will already have been done. + +- If a file with an "encoding" directive is edited and saved with + a different encoding, the directive may cause data corruption. + + +Support for Annotations +======================= + +Add an "annotation" role, as the equivalent of the HTML "title" +attribute? This is secondary information that may "pop up" when the +pointer hovers over the main text. A corresponding directive would be +required to associate annotations with the original text (by name, or +positionally as in anonymous targets?). + +There have not been many requests for such feature, though. Also, +cluttering WYSIWYG plaintext with annotations may not seem like a good +idea, and there is no "tool tip" in formats other than HTML. + + +``term`` Role +============= + +Add a "term" role for unfamiliar or specialized terminology? Probably +not; there is no real use case, and emphasis is enough for most cases. + + +Object references +================= + +We need syntax for `object references`_. + + - Parameterized substitutions? For example:: + + See |figure (figure name)| on |page (figure name)|. + + .. |figure (name)| figure-ref:: (name) + .. |page (name)| page-ref:: (name) + + The result would be:: + + See figure 3.11 on page 157. + + But this would require substitution directives to be processed at + reference-time, not at definition-time as they are now. Or, + perhaps the directives could just leave ``pending`` elements + behind, and the transforms do the work? How to pass the data + through? Too complicated. Use interpreted text roles. + +.. _object references: + ../todo.html#object-numbering-and-object-references + + +.. + Local Variables: + mode: indented-text + indent-tabs-mode: nil + sentence-end-double-space: t + fill-column: 70 + End: diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/dev/rst/problems.txt b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/dev/rst/problems.txt new file mode 100644 index 00000000..f4477426 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/dev/rst/problems.txt @@ -0,0 +1,871 @@ +============================== + Problems With StructuredText +============================== +:Author: David Goodger +:Contact: docutils-develop@lists.sourceforge.net +:Revision: $Revision$ +:Date: $Date$ +:Copyright: This document has been placed in the public domain. + +There are several problems, unresolved issues, and areas of +controversy within StructuredText_ (Classic and Next Generation). In +order to resolve all these issues, this analysis brings all of the +issues out into the open, enumerates all the alternatives, and +proposes solutions to be incorporated into the reStructuredText_ +specification. + + +.. contents:: + + +Formal Specification +==================== + +The description in the original StructuredText.py has been criticized +for being vague. For practical purposes, "the code *is* the spec." +Tony Ibbs has been working on deducing a `detailed description`_ from +the documentation and code of StructuredTextNG_. Edward Loper's +STMinus_ is another attempt to formalize a spec. + +For this kind of a project, the specification should always precede +the code. Otherwise, the markup is a moving target which can never be +adopted as a standard. Of course, a specification may be revised +during lifetime of the code, but without a spec there is no visible +control and thus no confidence. + + +Understanding and Extending the Code +==================================== + +The original StructuredText_ is a dense mass of sparsely commented +code and inscrutable regular expressions. It was not designed to be +extended and is very difficult to understand. StructuredTextNG_ has +been designed to allow input (syntax) and output extensions, but its +documentation (both internal [comments & docstrings], and external) is +inadequate for the complexity of the code itself. + +For reStructuredText to become truly useful, perhaps even part of +Python's standard library, it must have clear, understandable +documentation and implementation code. For the implementation of +reStructuredText to be taken seriously, it must be a sterling example +of the potential of docstrings; the implementation must practice what +the specification preaches. + + +Section Structure via Indentation +================================= + +Setext_ required that body text be indented by 2 spaces. The original +StructuredText_ and StructuredTextNG_ require that section structure +be indicated through indentation, as "inspired by Python". For +certain structures with a very limited, local extent (such as lists, +block quotes, and literal blocks), indentation naturally indicates +structure or hierarchy. For sections (which may have a very large +extent), structure via indentation is unnecessary, unnatural and +ambiguous. Rather, the syntax of the section title *itself* should +indicate that it is a section title. + +The original StructuredText states that "A single-line paragraph whose +immediately succeeding paragraphs are lower level is treated as a +header." Requiring indentation in this way is: + +- Unnecessary. The vast majority of docstrings and standalone + documents will have no more than one level of section structure. + Requiring indentation for such docstrings is unnecessary and + irritating. + +- Unnatural. Most published works use title style (type size, face, + weight, and position) and/or section/subsection numbering rather + than indentation to indicate hierarchy. This is a tradition with a + very long history. + +- Ambiguous. A StructuredText header is indistinguishable from a + one-line paragraph followed by a block quote (precluding the use of + block quotes). Enumerated section titles are ambiguous (is it a + header? is it a list item?). Some additional adornment must be + required to confirm the line's role as a title, both to a parser and + to the human reader of the source text. + +Python's use of significant whitespace is a wonderful (if not +original) innovation, however requiring indentation in ordinary +written text is hypergeneralization. + +reStructuredText_ indicates section structure through title adornment +style (as exemplified by this document). This is far more natural. +In fact, it is already in widespread use in plain text documents, +including in Python's standard distribution (such as the toplevel +README_ file). + + +Character Escaping Mechanism +============================ + +No matter what characters are chosen for markup, some day someone will +want to write documentation *about* that markup or using markup +characters in a non-markup context. Therefore, any complete markup +language must have an escaping or encoding mechanism. For a +lightweight markup system, encoding mechanisms like SGML/XML's '*' +are out. So an escaping mechanism is in. However, with carefully +chosen markup, it should be necessary to use the escaping mechanism +only infrequently. + +reStructuredText_ needs an escaping mechanism: a way to treat +markup-significant characters as the characters themselves. Currently +there is no such mechanism (although ZWiki uses '!'). What are the +candidates? + +1. ``!`` + (http://www.zope.org/DevHome/Members/jim/StructuredTextWiki/NGEscaping) +2. ``\`` +3. ``~`` +4. doubling of characters + +The best choice for this is the backslash (``\``). It's "the single +most popular escaping character in the world!", therefore familiar and +unsurprising. Since characters only need to be escaped under special +circumstances, which are typically those explaining technical +programming issues, the use of the backslash is natural and +understandable. Python docstrings can be raw (prefixed with an 'r', +as in 'r""'), which would obviate the need for gratuitous doubling-up +of backslashes. + +(On 2001-03-29 on the Doc-SIG mailing list, GvR endorsed backslash +escapes, saying, "'nuff said. Backslash it is." Although neither +legally binding nor irrevocable nor any kind of guarantee of anything, +it is a good sign.) + +The rule would be: An unescaped backslash followed by any markup +character escapes the character. The escaped character represents the +character itself, and is prevented from playing a role in any markup +interpretation. The backslash is removed from the output. A literal +backslash is represented by an "escaped backslash," two backslashes in +a row. + +A carefully constructed set of recognition rules for inline markup +will obviate the need for backslash-escapes in almost all cases; see +`Delimitation of Inline Markup`_ below. + +When an expression (requiring backslashes and other characters used +for markup) becomes too complicated and therefore unreadable, a +literal block may be used instead. Inside literal blocks, no markup +is recognized, therefore backslashes (for the purpose of escaping +markup) become unnecessary. + +We could allow backslashes preceding non-markup characters to remain +in the output. This would make describing regular expressions and +other uses of backslashes easier. However, this would complicate the +markup rules and would be confusing. + + +Blank Lines in Lists +==================== + +Oft-requested in Doc-SIG (the earliest reference is dated 1996-08-13) +is the ability to write lists without requiring blank lines between +items. In docstrings, space is at a premium. Authors want to convey +their API or usage information in as compact a form as possible. +StructuredText_ requires blank lines between all body elements, +including list items, even when boundaries are obvious from the markup +itself. + +In reStructuredText, blank lines are optional between list items. +However, in order to eliminate ambiguity, a blank line is required +before the first list item and after the last. Nested lists also +require blank lines before the list start and after the list end. + + +Bullet List Markup +================== + +StructuredText_ includes 'o' as a bullet character. This is dangerous +and counter to the language-independent nature of the markup. There +are many languages in which 'o' is a word. For example, in Spanish:: + + Llamame a la casa + o al trabajo. + + (Call me at home or at work.) + +And in Japanese (when romanized):: + + Senshuu no doyoubi ni tegami + o kakimashita. + + ([I] wrote a letter on Saturday last week.) + +If a paragraph containing an 'o' word wraps such that the 'o' is the +first text on a line, or if a paragraph begins with such a word, it +could be misinterpreted as a bullet list. + +In reStructuredText_, 'o' is not used as a bullet character. '-', +'*', and '+' are the possible bullet characters. + + +Enumerated List Markup +====================== + +StructuredText enumerated lists are allowed to begin with numbers and +letters followed by a period or right-parenthesis, then whitespace. +This has surprising consequences for writing styles. For example, +this is recognized as an enumerated list item by StructuredText:: + + Mr. Creosote. + +People will write enumerated lists in all different ways. It is folly +to try to come up with the "perfect" format for an enumerated list, +and limit the docstring parser's recognition to that one format only. + +Rather, the parser should recognize a variety of enumerator styles. +It is also recommended that the enumerator of the first list item be +ordinal-1 ('1', 'A', 'a', 'I', or 'i'), as output formats may not be +able to begin a list at an arbitrary enumeration. + +An initial idea was to require two or more consistent enumerated list +items in a row. This idea proved impractical and was dropped. In +practice, the presence of a proper enumerator is enough to reliably +recognize an enumerated list item; any ambiguities are reported by the +parser. Here's the original idea for posterity: + + The parser should recognize a variety of enumerator styles, mark + each block as a potential enumerated list item (PELI), and + interpret the enumerators of adjacent PELIs to decide whether they + make up a consistent enumerated list. + + If a PELI is labeled with a "1.", and is immediately followed by a + PELI labeled with a "2.", we've got an enumerated list. Or "(A)" + followed by "(B)". Or "i)" followed by "ii)", etc. The chances + of accidentally recognizing two adjacent and consistently labeled + PELIs, are acceptably small. + + For an enumerated list to be recognized, the following must be + true: + + - the list must consist of multiple adjacent list items (2 or + more) + - the enumerators must all have the same format + - the enumerators must be sequential + + +Definition List Markup +====================== + +StructuredText uses ' -- ' (whitespace, two hyphens, whitespace) on +the first line of a paragraph to indicate a definition list item. The +' -- ' serves to separate the term (on the left) from the definition +(on the right). + +Many people use ' -- ' as an em-dash in their text, conflicting with +the StructuredText usage. Although the Chicago Manual of Style says +that spaces should not be used around an em-dash, Peter Funk pointed +out that this is standard usage in German (according to the Duden, the +official German reference), and possibly in other languages as well. +The widespread use of ' -- ' precludes its use for definition lists; +it would violate the "unsurprising" criterion. + +A simpler, and at least equally visually distinctive construct +(proposed by Guido van Rossum, who incidentally is a frequent user of +' -- ') would do just as well:: + + term 1 + Definition. + + term 2 + Definition 2, paragraph 1. + + Definition 2, paragraph 2. + +A reStructuredText definition list item consists of a term and a +definition. A term is a simple one-line paragraph. A definition is a +block indented relative to the term, and may contain multiple +paragraphs and other body elements. No blank line precedes a +definition (this distinguishes definition lists from block quotes). + + +Literal Blocks +============== + +The StructuredText_ specification has literal blocks indicated by +'example', 'examples', or '::' ending the preceding paragraph. STNG +only recognizes '::'; 'example'/'examples' are not implemented. This +is good; it fixes an unnecessary language dependency. The problem is +what to do with the sometimes- unwanted '::'. + +In reStructuredText_ '::' at the end of a paragraph indicates that +subsequent *indented* blocks are treated as literal text. No further +markup interpretation is done within literal blocks (not even +backslash-escapes). If the '::' is preceded by whitespace, '::' is +omitted from the output; if '::' was the sole content of a paragraph, +the entire paragraph is removed (no 'empty' paragraph remains). If +'::' is preceded by a non-whitespace character, '::' is replaced by +':' (i.e., the extra colon is removed). + +Thus, a section could begin with a literal block as follows:: + + Section Title + ------------- + + :: + + print "this is example literal" + + +Tables +====== + +The table markup scheme in classic StructuredText was horrible. Its +omission from StructuredTextNG is welcome, and its markup will not be +repeated here. However, tables themselves are useful in +documentation. Alternatives: + +1. This format is the most natural and obvious. It was independently + invented (no great feat of creation!), and later found to be the + format supported by the `Emacs table mode`_:: + + +------------+------------+------------+--------------+ + | Header 1 | Header 2 | Header 3 | Header 4 | + +============+============+============+==============+ + | Column 1 | Column 2 | Column 3 & 4 span (Row 1) | + +------------+------------+------------+--------------+ + | Column 1 & 2 span | Column 3 | - Column 4 | + +------------+------------+------------+ - Row 2 & 3 | + | 1 | 2 | 3 | - span | + +------------+------------+------------+--------------+ + + Tables are described with a visual outline made up of the + characters '-', '=', '|', and '+': + + - The hyphen ('-') is used for horizontal lines (row separators). + - The equals sign ('=') is optionally used as a header separator + (as of version 1.5.24, this is not supported by the Emacs table + mode). + - The vertical bar ('|') is used for for vertical lines (column + separators). + - The plus sign ('+') is used for intersections of horizontal and + vertical lines. + + Row and column spans are possible simply by omitting the column or + row separators, respectively. The header row separator must be + complete; in other words, a header cell may not span into the table + body. Each cell contains body elements, and may have multiple + paragraphs, lists, etc. Initial spaces for a left margin are + allowed; the first line of text in a cell determines its left + margin. + +2. Below is a simpler table structure. It may be better suited to + manual input than alternative #1, but there is no Emacs editing + mode available. One disadvantage is that it resembles section + titles; a one-column table would look exactly like section & + subsection titles. :: + + ============ ============ ============ ============== + Header 1 Header 2 Header 3 Header 4 + ============ ============ ============ ============== + Column 1 Column 2 Column 3 & 4 span (Row 1) + ------------ ------------ --------------------------- + Column 1 & 2 span Column 3 - Column 4 + ------------------------- ------------ - Row 2 & 3 + 1 2 3 - span + ============ ============ ============ ============== + + The table begins with a top border of equals signs with a space at + each column boundary (regardless of spans). Each row is + underlined. Internal row separators are underlines of '-', with + spaces at column boundaries. The last of the optional head rows is + underlined with '=', again with spaces at column boundaries. + Column spans have no spaces in their underline. Row spans simply + lack an underline at the row boundary. The bottom boundary of the + table consists of '=' underlines. A blank line is required + following a table. + +3. A minimalist alternative is as follows:: + + ==== ===== ======== ======== ======= ==== ===== ===== + Old State Input Action New State Notes + ----------- -------- ----------------- ----------- + ids types new type sys.msg. dupname ids types + ==== ===== ======== ======== ======= ==== ===== ===== + -- -- explicit -- -- new True + -- -- implicit -- -- new False + None False explicit -- -- new True + old False explicit implicit old new True + None True explicit explicit new None True + old True explicit explicit new,old None True [1] + None False implicit implicit new None False + old False implicit implicit new,old None False + None True implicit implicit new None True + old True implicit implicit new old True + ==== ===== ======== ======== ======= ==== ===== ===== + + The table begins with a top border of equals signs with one or more + spaces at each column boundary (regardless of spans). There must + be at least two columns in the table (to differentiate it from + section headers). Each line starts a new row. The rightmost + column is unbounded; text may continue past the edge of the table. + Each row/line must contain spaces at column boundaries, except for + explicit column spans. Underlines of '-' can be used to indicate + column spans, but should be used sparingly if at all. Lines + containing column span underlines may not contain any other text. + The last of the optional head rows is underlined with '=', again + with spaces at column boundaries. The bottom boundary of the table + consists of '=' underlines. A blank line is required following a + table. + + This table sums up the features. Using all the features in such a + small space is not pretty though:: + + ======== ======== ======== + Header 2 & 3 Span + ------------------ + Header 1 Header 2 Header 3 + ======== ======== ======== + Each line is a new row. + Each row consists of one line only. + Row spans are not possible. + The last column may spill over to the right. + Column spans are possible with an underline joining columns. + ---------------------------- + The span is limited to the row above the underline. + ======== ======== ======== + +4. As a variation of alternative 3, bullet list syntax in the first + column could be used to indicate row starts. Multi-line rows are + possible, but row spans are not. For example:: + + ===== ===== + col 1 col 2 + ===== ===== + - 1 Second column of row 1. + - 2 Second column of row 2. + Second line of paragraph. + - 3 Second column of row 3. + + Second paragraph of row 3, + column 2 + ===== ===== + + Column spans would be indicated on the line after the last line of + the row. To indicate a real bullet list within a first-column + cell, simply nest the bullets. + +5. In a further variation, we could simply assume that whitespace in + the first column implies a multi-line row; the text in other + columns is continuation text. For example:: + + ===== ===== + col 1 col 2 + ===== ===== + 1 Second column of row 1. + 2 Second column of row 2. + Second line of paragraph. + 3 Second column of row 3. + + Second paragraph of row 3, + column 2 + ===== ===== + + Limitations of this approach: + + - Cells in the first column are limited to one line of text. + + - Cells in the first column *must* contain some text; blank cells + would lead to a misinterpretation. An empty comment ("..") is + sufficient. + +6. Combining alternative 3 and 4, a bullet list in the first column + could mean multi-line rows, and no bullet list means single-line + rows only. + +Alternatives 1 and 5 has been adopted by reStructuredText. + + +Delimitation of Inline Markup +============================= + +StructuredText specifies that inline markup must begin with +whitespace, precluding such constructs as parenthesized or quoted +emphatic text:: + + "**What?**" she cried. (*exit stage left*) + +The `reStructuredText markup specification`_ allows for such +constructs and disambiguates inline markup through a set of +recognition rules. These recognition rules define the context of +markup start-strings and end-strings, allowing markup characters to be +used in most non-markup contexts without a problem (or a backslash). +So we can say, "Use asterisks (*) around words or phrases to +*emphasisze* them." The '(*)' will not be recognized as markup. This +reduces the need for markup escaping to the point where an escape +character is *almost* (but not quite!) unnecessary. + + +Underlining +=========== + +StructuredText uses '_text_' to indicate underlining. To quote David +Ascher in his 2000-01-21 Doc-SIG mailing list post, "Docstring +grammar: a very revised proposal": + + The tagging of underlined text with _'s is suboptimal. Underlines + shouldn't be used from a typographic perspective (underlines were + designed to be used in manuscripts to communicate to the + typesetter that the text should be italicized -- no well-typeset + book ever uses underlines), and conflict with double-underscored + Python variable names (__init__ and the like), which would get + truncated and underlined when that effect is not desired. Note + that while *complete* markup would prevent that truncation + ('__init__'), I think of docstring markups much like I think of + type annotations -- they should be optional and above all do no + harm. In this case the underline markup does harm. + +Underlining is not part of the reStructuredText specification. + + +Inline Literals +=============== + +StructuredText's markup for inline literals (text left as-is, +verbatim, usually in a monospaced font; as in HTML <TT>) is single +quotes ('literals'). The problem with single quotes is that they are +too often used for other purposes: + +- Apostrophes: "Don't blame me, 'cause it ain't mine, it's Chris'."; + +- Quoting text: + + First Bruce: "Well Bruce, I heard the prime minister use it. + 'S'hot enough to boil a monkey's bum in 'ere your Majesty,' he + said, and she smiled quietly to herself." + + In the UK, single quotes are used for dialogue in published works. + +- String literals: s = '' + +Alternatives:: + + 'text' \'text\' ''text'' "text" \"text\" ""text"" + #text# @text@ `text` ^text^ ``text'' ``text`` + +The examples below contain inline literals, quoted text, and +apostrophes. Each example should evaluate to the following HTML:: + + Some <TT>code</TT>, with a 'quote', "double", ain't it grand? + Does <TT>a[b] = 'c' + "d" + `2^3`</TT> work? + + 0. Some code, with a quote, double, ain't it grand? + Does a[b] = 'c' + "d" + `2^3` work? + 1. Some 'code', with a \'quote\', "double", ain\'t it grand? + Does 'a[b] = \'c\' + "d" + `2^3`' work? + 2. Some \'code\', with a 'quote', "double", ain't it grand? + Does \'a[b] = 'c' + "d" + `2^3`\' work? + 3. Some ''code'', with a 'quote', "double", ain't it grand? + Does ''a[b] = 'c' + "d" + `2^3`'' work? + 4. Some "code", with a 'quote', \"double\", ain't it grand? + Does "a[b] = 'c' + "d" + `2^3`" work? + 5. Some \"code\", with a 'quote', "double", ain't it grand? + Does \"a[b] = 'c' + "d" + `2^3`\" work? + 6. Some ""code"", with a 'quote', "double", ain't it grand? + Does ""a[b] = 'c' + "d" + `2^3`"" work? + 7. Some #code#, with a 'quote', "double", ain't it grand? + Does #a[b] = 'c' + "d" + `2^3`# work? + 8. Some @code@, with a 'quote', "double", ain't it grand? + Does @a[b] = 'c' + "d" + `2^3`@ work? + 9. Some `code`, with a 'quote', "double", ain't it grand? + Does `a[b] = 'c' + "d" + \`2^3\`` work? + 10. Some ^code^, with a 'quote', "double", ain't it grand? + Does ^a[b] = 'c' + "d" + `2\^3`^ work? + 11. Some ``code'', with a 'quote', "double", ain't it grand? + Does ``a[b] = 'c' + "d" + `2^3`'' work? + 12. Some ``code``, with a 'quote', "double", ain't it grand? + Does ``a[b] = 'c' + "d" + `2^3\``` work? + +Backquotes (#9 & #12) are the best choice. They are unobtrusive and +relatviely rarely used (more rarely than ' or ", anyhow). Backquotes +have the connotation of 'quotes', which other options (like carets, +#10) don't. + +Analogously with ``*emph*`` & ``**strong**``, double-backquotes (#12) +could be used for inline literals. If single-backquotes are used for +'interpreted text' (context-sensitive domain-specific descriptive +markup) such as function name hyperlinks in Python docstrings, then +double-backquotes could be used for absolute-literals, wherein no +processing whatsoever takes place. An advantage of double-backquotes +would be that backslash-escaping would no longer be necessary for +embedded single-backquotes; however, embedded double-backquotes (in an +end-string context) would be illegal. See `Backquotes in +Phrase-Links`__ in `Record of reStructuredText Syntax Alternatives`__. + +__ alternatives.html#backquotes-in-phrase-links +__ alternatives.html + +Alternative choices are carets (#10) and TeX-style quotes (#11). For +examples of TeX-style quoting, see +http://www.zope.org/Members/jim/StructuredTextWiki/CustomizingTheDocumentProcessor. + +Some existing uses of backquotes: + +1. As a synonym for repr() in Python. +2. For command-interpolation in shell scripts. +3. Used as open-quotes in TeX code (and carried over into plaintext + by TeXies). + +The inline markup start-string and end-string recognition rules +defined by the `reStructuredText markup specification`_ would allow +all of these cases inside inline literals, with very few exceptions. +As a fallback, literal blocks could handle all cases. + +Outside of inline literals, the above uses of backquotes would require +backslash-escaping. However, these are all prime examples of text +that should be marked up with inline literals. + +If either backquotes or straight single-quotes are used as markup, +TeX-quotes are too troublesome to support, so no special-casing of +TeX-quotes should be done (at least at first). If TeX-quotes have to +be used outside of literals, a single backslash-escaped would suffice: +\``TeX quote''. Ugly, true, but very infrequently used. + +Using literal blocks is a fallback option which removes the need for +backslash-escaping:: + + like this:: + + Here, we can do ``absolutely'' anything `'`'\|/|\ we like! + +No mechanism for inline literals is perfect, just as no escaping +mechanism is perfect. No matter what we use, complicated inline +expressions involving the inline literal quote and/or the backslash +will end up looking ugly. We can only choose the least often ugly +option. + +reStructuredText will use double backquotes for inline literals, and +single backqoutes for interpreted text. + + +Hyperlinks +========== + +There are three forms of hyperlink currently in StructuredText_: + +1. (Absolute & relative URIs.) Text enclosed by double quotes + followed by a colon, a URI, and concluded by punctuation plus white + space, or just white space, is treated as a hyperlink:: + + "Python":http://www.python.org/ + +2. (Absolute URIs only.) Text enclosed by double quotes followed by a + comma, one or more spaces, an absolute URI and concluded by + punctuation plus white space, or just white space, is treated as a + hyperlink:: + + "mail me", mailto:me@mail.com + +3. (Endnotes.) Text enclosed by brackets link to an endnote at the + end of the document: at the beginning of the line, two dots, a + space, and the same text in brackets, followed by the end note + itself:: + + Please refer to the fine manual [GVR2001]. + + .. [GVR2001] Python Documentation, Release 2.1, van Rossum, + Drake, et al., http://www.python.org/doc/ + +The problem with forms 1 and 2 is that they are neither intuitive nor +unobtrusive (they break design goals 5 & 2). They overload +double-quotes, which are too often used in ordinary text (potentially +breaking design goal 4). The brackets in form 3 are also too common +in ordinary text (such as [nested] asides and Python lists like [12]). + +Alternatives: + +1. Have no special markup for hyperlinks. + +2. A. Interpret and mark up hyperlinks as any contiguous text + containing '://' or ':...@' (absolute URI) or '@' (email + address) after an alphanumeric word. To de-emphasize the URI, + simply enclose it in parentheses: + + Python (http://www.python.org/) + + B. Leave special hyperlink markup as a domain-specific extension. + Hyperlinks in ordinary reStructuredText documents would be + required to be standalone (i.e. the URI text inline in the + document text). Processed hyperlinks (where the URI text is + hidden behind the link) are important enough to warrant syntax. + +3. The original Setext_ introduced a mechanism of indirect hyperlinks. + A source link word ('hot word') in the text was given a trailing + underscore:: + + Here is some text with a hyperlink_ built in. + + The hyperlink itself appeared at the end of the document on a line + by itself, beginning with two dots, a space, the link word with a + leading underscore, whitespace, and the URI itself:: + + .. _hyperlink http://www.123.xyz + + Setext used ``underscores_instead_of_spaces_`` for phrase links. + +With some modification, alternative 3 best satisfies the design goals. +It has the advantage of being readable and relatively unobtrusive. +Since each source link must match up to a target, the odd variable +ending in an underscore can be spared being marked up (although it +should generate a "no such link target" warning). The only +disadvantage is that phrase-links aren't possible without some +obtrusive syntax. + +We could achieve phrase-links if we enclose the link text: + +1. in double quotes:: + + "like this"_ + +2. in brackets:: + + [like this]_ + +3. or in backquotes:: + + `like this`_ + +Each gives us somewhat obtrusive markup, but that is unavoidable. The +bracketed syntax (#2) is reminiscent of links on many web pages +(intuitive), although it is somewhat obtrusive. Alternative #3 is +much less obtrusive, and is consistent with interpreted text: the +trailing underscore indicates the interpretation of the phrase, as a +hyperlink. #3 also disambiguates hyperlinks from footnote references. +Alternative #3 wins. + +The same trailing underscore markup can also be used for footnote and +citation references, removing the problem with ordinary bracketed text +and Python lists:: + + Please refer to the fine manual [GVR2000]_. + + .. [GVR2000] Python Documentation, van Rossum, Drake, et al., + http://www.python.org/doc/ + +The two-dots-and-a-space syntax was generalized by Setext for +comments, which are removed from the (visible) processed output. +reStructuredText uses this syntax for comments, footnotes, and link +target, collectively termed "explicit markup". For link targets, in +order to eliminate ambiguity with comments and footnotes, +reStructuredText specifies that a colon always follow the link target +word/phrase. The colon denotes 'maps to'. There is no reason to +restrict target links to the end of the document; they could just as +easily be interspersed. + +Internal hyperlinks (links from one point to another within a single +document) can be expressed by a source link as before, and a target +link with a colon but no URI. In effect, these targets 'map to' the +element immediately following. + +As an added bonus, we now have a perfect candidate for +reStructuredText directives, a simple extension mechanism: explicit +markup containing a single word followed by two colons and whitespace. +The interpretation of subsequent data on the directive line or +following is directive-dependent. + +To summarize:: + + .. This is a comment. + + .. The line below is an example of a directive. + .. version:: 1 + + This is a footnote [1]_. + + This internal hyperlink will take us to the footnotes_ area below. + + Here is a one-word_ external hyperlink. + + Here is `a hyperlink phrase`_. + + .. _footnotes: + .. [1] Footnote text goes here. + + .. external hyperlink target mappings: + .. _one-word: http://www.123.xyz + .. _a hyperlink phrase: http://www.123.xyz + +The presence or absence of a colon after the target link +differentiates an indirect hyperlink from a footnote, respectively. A +footnote requires brackets. Backquotes around a target link word or +phrase are required if the phrase contains a colon, optional +otherwise. + +Below are examples using no markup, the two StructuredText hypertext +styles, and the reStructuredText hypertext style. Each example +contains an indirect link, a direct link, a footnote/endnote, and +bracketed text. In HTML, each example should evaluate to:: + + <P>A <A HREF="http://spam.org">URI</A>, see <A HREF="#eggs2000"> + [eggs2000]</A> (in Bacon [Publisher]). Also see + <A HREF="http://eggs.org">http://eggs.org</A>.</P> + + <P><A NAME="eggs2000">[eggs2000]</A> "Spam, Spam, Spam, Eggs, + Bacon, and Spam"</P> + +1. No markup:: + + A URI http://spam.org, see eggs2000 (in Bacon [Publisher]). + Also see http://eggs.org. + + eggs2000 "Spam, Spam, Spam, Eggs, Bacon, and Spam" + +2. StructuredText absolute/relative URI syntax + ("text":http://www.url.org):: + + A "URI":http://spam.org, see [eggs2000] (in Bacon [Publisher]). + Also see "http://eggs.org":http://eggs.org. + + .. [eggs2000] "Spam, Spam, Spam, Eggs, Bacon, and Spam" + + Note that StructuredText does not recognize standalone URIs, + forcing doubling up as shown in the second line of the example + above. + +3. StructuredText absolute-only URI syntax + ("text", mailto:you@your.com):: + + A "URI", http://spam.org, see [eggs2000] (in Bacon + [Publisher]). Also see "http://eggs.org", http://eggs.org. + + .. [eggs2000] "Spam, Spam, Spam, Eggs, Bacon, and Spam" + +4. reStructuredText syntax:: + + 4. A URI_, see [eggs2000]_ (in Bacon [Publisher]). + Also see http://eggs.org. + + .. _URI: http:/spam.org + .. [eggs2000] "Spam, Spam, Spam, Eggs, Bacon, and Spam" + +The bracketed text '[Publisher]' may be problematic with +StructuredText (syntax 2 & 3). + +reStructuredText's syntax (#4) is definitely the most readable. The +text is separated from the link URI and the footnote, resulting in +cleanly readable text. + +.. _StructuredText: https://zopestructuredtext.readthedocs.org/ +.. _Setext: https://docutils.sourceforge.io/mirror/setext.html +.. _reStructuredText: https://docutils.sourceforge.io/rst.html +.. _detailed description: + http://homepage.ntlworld.com/tibsnjoan/docutils/STNG-format.html +.. _STMinus: http://www.cis.upenn.edu/~edloper/pydoc/stminus.html +.. _StructuredTextNG: + http://www.zope.org/DevHome/Members/jim/StructuredTextWiki/StructuredTextNG +.. _README: http://cvs.sourceforge.net/cgi-bin/viewcvs.cgi/~checkout~/ + python/python/dist/src/README +.. _Emacs table mode: http://table.sourceforge.net/ +.. _reStructuredText Markup Specification: + ../../ref/rst/restructuredtext.html + + +.. + Local Variables: + mode: indented-text + indent-tabs-mode: nil + sentence-end-double-space: t + fill-column: 70 + End: diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/dev/runtime-settings-processing.txt b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/dev/runtime-settings-processing.txt new file mode 100644 index 00000000..6df15a92 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/dev/runtime-settings-processing.txt @@ -0,0 +1,306 @@ +============================= + Runtime Settings Processing +============================= + +:Author: David Goodger, Günter Milde +:Contact: docutils-develop@lists.sourceforge.net +:Date: $Date$ +:Revision: $Revision$ +:Copyright: This document has been placed in the public domain. + +:Abstract: A detailled description of Docutil's settings processing + framework. + +.. contents:: + + +The ``docutils/__init__.py``, ``docutils/core.py``, and +``docutils.frontend.py`` modules are described. +Following along with the actual code is recommended. + +See `Docutils Runtime Settings`_ for a high-level description of the core +API and `Docutils Configuration`_ for a description of the individual +settings. + +.. note:: + This document is informal. + It describes the state in Docutils 0.18.1. + Implementation details will change with the move to replace the + deprecated optparse_ module with argparse_. + + +Settings priority +================= + +Docutils overlays default and explicitly specified values from various +sources such that settings behave the way we want and expect them to +behave. + +The souces are overlaid in the following order (later sources +overwrite earlier ones): + +1. Defaults specified in `settings_spec`__ and + `settings_defaults`__ attributes for each component_. + (details__) + + __ ../api/runtime-settings.html#settingsspec-settings-spec + __ ../api/runtime-settings.html#settingsspec-settings-defaults + __ `OptionParser.populate_from_components()`_ + +2. Defaults specified in `settings_default_overrides`__ attribute for + each component_. + (details__) + + __ ../api/runtime-settings.html#settingsspec-settings-default-overrides + __ component.settings_default_overrides_ + +3. Settings specified in the `settings_overrides parameter`_ of the + `convenience functions`_ rsp. the `settings_overrides` attribute of + a `core.Publisher` instance. + (details__) + + __ OptionParser.defaults_ + +4. If enabled, settings specified in `active sections`_ of the + `configuration files`_ in the order described in + `Configuration File Sections & Entries`_. (details__) + + See also `Configuration File Sections & Entries`_. + + __ `OptionParser.get_standard_config_settings()`_ + +5. If enabled, command line arguments (details__). + + __ `Publisher.process_command_line()`_ + + +Settings assigned to the `settings parameter`_ of the +`convenience functions`_ or the ``Publisher.settings`` attribute +are used **instead of** the above sources +(see below for details for `command-line tools`__ and +`other applications`__). + +__ `publisher.publish()`_ +__ `publisher.process_programmatic_settings()`_ + +.. _command-line tools: + +Runtime settings processing for command-line tools +================================================== + +The command-line `front-end tools`_ usually import and call +the `convenience function`_ ``docutils.core.publish_cmdline()``. + +1. ``docutils.core.publish_cmdline()`` creates a `Publisher`_ instance:: + + publisher = core.Publisher(…, settings) + + eventually sets the components_ from the respective names, and calls :: + + publisher.publish(argv, …, settings_spec, + settings_overrides, config_section, …) + + .. _publisher.publish(): + +2. If `publisher.settings` is None, ``publisher.publish()`` calls:: + + publisher.process_command_line(…, + settings_spec, config_section, **defaults) + + with `defaults` taken from `publisher.settings_overrides`. + + If `publisher.settings` is defined, steps 3 to 5 are skipped. + +3. ``publisher.process_command_line()`` calls:: + + optpar = publisher.setup_option_parser(…, + settings_spec, config_section, **defaults) + + .. _publisher.setup_option_parser(): + +4. ``publisher.setup_option_parser()`` + + - merges the value of the `config_section parameter`_ into + `settings_spec` and + + - creates an `OptionParser` instance :: + + optpar = docutils.frontend.OptionParser(components, defaults) + + with `components` the tuple of the `SettingsSpec`_ instances + ``(publisher.parser, publisher.reader, publisher.writer, settings_spec)`` + + .. _OptionParser.populate_from_components(): + +5. The `OptionParser` instance prepends itself to the `components` tuple + and calls ``self.populate_from_components(components)``, which updates + the attribute ``self.defaults`` in two steps: + + a) For each component passed, ``component.settings_spec`` is processed + and ``component.settings_defaults`` is applied. + + .. _component.settings_default_overrides: + + b) In a second loop, for each component + ``component.settings_default_overrides`` is applied. This way, + ``component.settings_default_overrides`` can override the default + settings of any other component. + + .. _OptionParser.defaults: + +6. Back in ``OptionParser.__init__()``, ``self.defaults`` is updated with + the `defaults` argument passed to ``OptionParser(…)`` in step 5. + + This means that the `settings_overrides` argument of the + `convenience functions`_ has priority over all + ``SettingsSpec.settings_spec`` defaults. + + .. _OptionParser.get_standard_config_settings(): + +7. If configuration files are enabled, + ``self.get_standard_config_settings()`` is called. + + This reads the Docutils `configuration files`_, and returns a + dictionary of settings in `active sections`_ which is used to update + ``optpar.defaults``. So configuration file settings have priority over + all software-defined defaults. + + .. _Publisher.process_command_line(): + +8. ``publisher.process_command_line()`` calls ``optpar.parse_args()``. + The OptionParser parses all command line options and returns a + `docutils.frontend.Values` object. + This is assigned to ``publisher.settings``. + So command-line options have priority over configuration file + settings. + +9. The `<source>` and `<destination>` command-line arguments + are also parsed, and assigned to ``publisher.settings._source`` + and ``publisher.settings._destination`` respectively. + +10. ``publisher.publish()`` calls ``publisher.set_io()`` with no arguments. + If either ``publisher.source`` or ``publisher.destination`` are not + set, the corresponding ``publisher.set_source()`` and + ``publisher.set_destination()`` are called: + + ``publisher.set_source()`` + checks for a ``source_path`` argument, and if there is none (which + is the case for command-line use), it is taken from + ``publisher.settings._source``. ``publisher.source`` is set by + instantiating a ``publisher.source_class`` object. + For command-line front-end tools, the default + ``publisher.source_class`` (i.e. ``docutils.io.FileInput``) + is used. + + ``publisher.set_destination()`` + does the same job for the destination. (the default + ``publisher.destination_class`` is ``docutils.io.FileOutput``). + + .. _accessing the runtime settings: + +11. ``publisher.publish()`` passes ``publisher.settings`` to the reader_ + component's ``read()`` method. + +12. The reader component creates a new `document root node`__. + ``nodes.document.__init__()`` adds the settings to the internal + attributes. + + __ ../ref/doctree.html#document + + All components acting on the Document Tree are handed the + ``document`` root node and can access the runtime settings as + ``document.settings``. + + +Runtime settings processing for other applications +================================================== + +The `convenience functions`_ , ``core.publish_file()``, +``core.publish_string()``, or ``core.publish_parts()`` do not parse the +command line for settings. + +1. The convenience functions call the generic programmatic interface + function ``core.publish_programmatically()`` that creates a + `core.Publisher` instance :: + + pub = core.Publisher(…, settings) + + eventually sets the components_ from the respective names, and calls :: + + publisher.process_programmatic_settings( + settings_spec, settings_overrides, config_section) + + .. _publisher.process_programmatic_settings(): + +2. If `publisher.settings` is None, + ``publisher.process_programmatic_settings()`` calls:: + + publisher.get_settings(settings_spec, config_section, **defaults) + + with `defaults` taken from `publisher.settings_overrides`. + + If `publisher.settings` is defined, the following steps are skipped. + +3. ``publisher.get_settings()`` calls:: + + optpar = publisher.setup_option_parser(…, + settings_spec, config_section, **defaults) + +4. The OptionParser instance determines setting defaults + as described in `steps 4 to 7`__ in the previous section. + + __ `publisher.setup_option_parser()`_ + +5. Back in ``publisher.get_settings()``, the ``frontend.Values`` instance + returned by ``optpar.get_default_values()`` is stored in + ``publisher.settings``. + +6. ``publish_programmatically()`` continues with setting + ``publisher.source`` and ``publisher.destination``. + +7. Finally, ``publisher.publish()`` is called. As ``publisher.settings`` + is not None, no further command line processing takes place. + +8. All components acting on the Document Tree are handed the + ``document`` root node and can access the runtime settings as + ``document.settings`` (cf. `steps 11 and 12`__ in the previous section). + + __ `accessing the runtime settings`_ + + +.. References: + +.. _optparse: https://docs.python.org/dev/library/optparse.html +.. _argparse: https://docs.python.org/dev/library/argparse.html + +.. _Docutils Runtime Settings: + ../api/runtime-settings.html +.. _active sections: + ../api/runtime-settings.html#active-sections +.. _SettingsSpec: + ../api/runtime-settings.html#settingsspec +.. _component: +.. _components: + ../api/runtime-settings.html#components +.. _application settings specifications: +.. _convenience function: +.. _convenience functions: + ../api/runtime-settings.html#convenience-functions +.. _settings_overrides parameter: + ../api/runtime-settings.html#settings-overrides-parameter +.. _settings parameter: + ../api/runtime-settings.html#settings-parameter +.. _config_section parameter: + ../api/runtime-settings.html#config-section-parameter + +.. _Publisher convenience functions: + ../api/publisher.html#publisher-convenience-functions +.. _front-end tools: ../user/tools.html +.. _configuration file: +.. _configuration files: +.. _Docutils Configuration: ../user/config.html#configuration-files +.. _Configuration File Sections & Entries: + ../user/config.html#configuration-file-sections-entries +.. _Docutils Project Model: ../peps/pep-0258.html#docutils-project-model +.. _Publisher: ../peps/pep-0258.html#publisher +.. _Reader: ../peps/pep-0258.html#reader diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/dev/semantics.txt b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/dev/semantics.txt new file mode 100644 index 00000000..15dcadd3 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/dev/semantics.txt @@ -0,0 +1,119 @@ +===================== + Docstring Semantics +===================== +:Author: David Goodger +:Contact: docutils-develop@lists.sourceforge.net +:Revision: $Revision$ +:Date: $Date$ +:Copyright: This document has been placed in the public domain. + +These are notes for a possible future PEP providing the final piece of +the Python docstring puzzle: docstring semantics or documentation +methodology. `PEP 257`_, Docstring Conventions, sketches out some +guidelines, but does not get into methodology details. + +I haven't explored documentation methodology more because, in my +opinion, it is a completely separate issue from syntax, and it's even +more controversial than syntax. Nobody wants to be told how to lay +out their documentation, a la JavaDoc_. I think the JavaDoc way is +butt-ugly, but it *is* an established standard for the Java world. +Any standard documentation methodology has to be formal enough to be +useful but remain light enough to be usable. If the methodology is +too strict, too heavy, or too ugly, many/most will not want to use it. + +I think a standard methodology could benefit the Python community, but +it would be a hard sell. A PEP would be the place to start. For most +human-readable documentation needs, the free-form text approach is +adequate. We'd only need a formal methodology if we want to extract +the parameters into a data dictionary, index, or summary of some kind. + + +PythonDoc +========= + +(Not to be confused with Daniel Larsson's pythondoc_ project.) + +A Python version of the JavaDoc_ semantics (not syntax). A set of +conventions which are understood by the Docutils. What JavaDoc has +done is to establish a syntax that enables a certain documentation +methodology, or standard *semantics*. JavaDoc is not just syntax; it +prescribes a methodology. + +- Use field lists or definition lists for "tagged blocks". By this I + mean that field lists can be used similarly to JavaDoc's ``@tag`` + syntax. That's actually one of the motivators behind field lists. + For example, we could have:: + + """ + :Parameters: + - `lines`: a list of one-line strings without newlines. + - `until_blank`: Stop collecting at the first blank line if + true (1). + - `strip_indent`: Strip common leading indent if true (1, + default). + + :Return: + - a list of indented lines with minimum indent removed; + - the amount of the indent; + - whether or not the block finished with a blank line or at + the end of `lines`. + """ + + This is taken straight out of docutils/statemachine.py, in which I + experimented with a simple documentation methodology. Another + variation I've thought of exploits the Grouch_-compatible + "classifier" element of definition lists. For example:: + + :Parameters: + `lines` : [string] + List of one-line strings without newlines. + `until_blank` : boolean + Stop collecting at the first blank line if true (1). + `strip_indent` : boolean + Strip common leading indent if true (1, default). + +- Field lists could even be used in a one-to-one correspondence with + JavaDoc ``@tags``, although I doubt if I'd recommend it. Several + ports of JavaDoc's ``@tag`` methodology exist in Python, most + recently Ed Loper's "epydoc_". + + +Other Ideas +=========== + +- Can we extract comments from parsed modules? Could be handy for + documenting function/method parameters:: + + def method(self, + source, # path of input file + dest # path of output file + ): + + This would save having to repeat parameter names in the docstring. + + Idea from Mark Hammond's 1998-06-23 Doc-SIG post, "Re: [Doc-SIG] + Documentation tool": + + it would be quite hard to add a new param to this method without + realising you should document it + +- Frederic Giacometti's `iPhrase Python documentation conventions`_ is + an attachment to his Doc-SIG post of 2001-05-30. + + +.. _PEP 257: https://peps.python.org/pep-0257 +.. _JavaDoc: http://java.sun.com/j2se/javadoc/ +.. _pythondoc: http://starship.python.net/crew/danilo/pythondoc/ +.. _Grouch: http://www.mems-exchange.org/software/grouch/ +.. _epydoc: http://epydoc.sourceforge.net/ +.. _iPhrase Python documentation conventions: + https://mail.python.org/pipermail/doc-sig/2001-May/001840.html + + +.. + Local Variables: + mode: indented-text + indent-tabs-mode: nil + sentence-end-double-space: t + fill-column: 70 + End: diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/dev/testing.txt b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/dev/testing.txt new file mode 100644 index 00000000..cee79116 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/dev/testing.txt @@ -0,0 +1,301 @@ +=================== + Docutils_ Testing +=================== + +:Authors: Lea Wiemann <LeWiemann@gmail.com>; + David Goodger <goodger@python.org>; + Docutils developers <docutils-developers@lists.sourceforge.net> +:Revision: $Revision$ +:Date: $Date$ +:Copyright: This document has been placed in the public domain. + +.. _Docutils: https://docutils.sourceforge.io/ + +.. contents:: + +When adding new functionality (or fixing bugs), be sure to add test +cases to the test suite. Practise test-first programming; it's fun, +it's addictive, and it works! + +This document describes how to run the Docutils test suite, how the +tests are organized and how to add new tests or modify existing tests. + + +Running the Test Suite +====================== + +Before checking in any changes, run the entire Docutils test suite to +be sure that you haven't broken anything. From a shell do [#]_:: + + cd docutils/test + python -u alltests.py + +Before `checking in`__ changes to the Docutils core, run the tests on +all `supported Python versions`_ (see below for details). +In a pinch, the edge cases should cover most of it. + +__ policies.html#check-ins + +.. note:: + Due to incompatible customization of the standard unittest_ + framework, the test suite does not work with popular test frameworks + like pytest_ or nose_. + + .. _unittest: https://docs.python.org/3/library/unittest.html + .. _pytest: https://pypi.org/project/pytest/ + .. _nose: https://pypi.org/project/nose3/ + + .. cf. https://sourceforge.net/p/docutils/feature-requests/81/ + +.. [#] When using the `Python launcher for Windows`__, make sure to + specify a Python version, e.g., ``py -3.9 -u alltests.py`` for + Python 3.9. + + __ https://docs.python.org/3/using/windows.html#python-launcher-for-windows + + .. cf. https://sourceforge.net/p/docutils/bugs/434/ + + +.. _Python versions: + +Testing across multiple Python versions +--------------------------------------- + +A Docutils release has a commitment to support a minimum Python version [#]_ +and beyond. Before a release is cut, tests must pass in all +`supported versions`_. + +You can use `tox`_ to test with all supported versions in one go. +From the shell:: + + cd docutils + tox + +To test a specific version, use the ``pyNN`` environment. For example:: + + tox -e py37 + +`pyenv`_ can be installed and configured (see `installing pyenv`_) to +get multiple Python versions:: + + # assuming your system runs 3.9.x + pyenv install 3.7.12 + pyenv install 3.8.12 + pyenv install 3.10.1 + pyenv global system 3.7.12 3.8.12 3.10.1 + + # reset your shims + rm -rf ~/.pyenv/shims && pyenv rehash + +This will give you ``python3.7`` through ``python3.10``. +Then run:: + + python3.7 -u alltests.py + [...] + python3.10 -u alltests.py + +.. [#] Good resources covering the differences between Python versions + are the `What's New` documents (`What's New in Python 3.10`__ and + similar). + +__ https://docs.python.org/3/whatsnew/3.10.html + + +.. _supported versions: +.. _supported Python versions: ../../README.html#requirements +.. _pyenv: https://github.com/yyuu/pyenv +.. _installing pyenv: https://github.com/yyuu/pyenv#installation +.. _tox: https://pypi.org/project/tox/ + + +Unit Tests +========== + +Unit tests test single functions or modules (i.e. whitebox testing). + +If you are implementing a new feature, be sure to write a test case +covering its functionality. It happens very frequently that your +implementation (or even only a part of it) doesn't work with an older +(or even newer) Python version, and the only reliable way to detect +those cases is using tests. + +Often, it's easier to write the test first and then implement the +functionality required to make the test pass. + + +Writing New Tests +----------------- + +When writing new tests, it very often helps to see how a similar test +is implemented. For example, the files in the +``test_parsers/test_rst/`` directory all look very similar. So when +adding a test, you don't have to reinvent the wheel. + +If there is no similar test, you can write a new test from scratch +using Python's ``unittest`` module. For an example, please have a +look at the following imaginary ``test_square.py``:: + + #! /usr/bin/env python + + # $Id$ + # Author: Your Name <your_email_address@example.org> + # Copyright: This module has been placed in the public domain. + + """ + Test module for docutils.square. + """ + + import unittest + import docutils.square + + + class SquareTest(unittest.TestCase): + + def test_square(self): + self.assertEqual(docutils.square.square(0), 0) + self.assertEqual(docutils.square.square(5), 25) + self.assertEqual(docutils.square.square(7), 49) + + def test_square_root(self): + self.assertEqual(docutils.square.sqrt(49), 7) + self.assertEqual(docutils.square.sqrt(0), 0) + self.assertRaises(docutils.square.SquareRootError, + docutils.square.sqrt, 20) + + + if __name__ == '__main__': + unittest.main() + +For more details on how to write tests, please refer to the +documentation of the ``unittest`` module. + +.. Note:: + + Unit tests and functional test should generally set :: + + settings_overrides['_disable_config'] = True + + in order to be independent on the users local configuration. + +.. _functional: + +Functional Tests +================ + +The directory ``test/functional/`` contains data for functional tests. + +Performing functional testing means testing the Docutils system as a +whole (i.e. blackbox testing). + + +Directory Structure +------------------- + ++ ``functional/`` The main data directory. + + + ``input/`` The input files. + + - ``some_test.txt``, for example. + + + ``output/`` The actual output. + + - ``some_test.html``, for example. + + + ``expected/`` The expected output. + + - ``some_test.html``, for example. + + + ``tests/`` The config files for processing the input files. + + - ``some_test.py``, for example. + + - ``_default.py``, the `default configuration file`_. + + +The Testing Process +------------------- + +When running ``test_functional.py``, all config files in +``functional/tests/`` are processed. (Config files whose names begin +with an underscore are ignored.) The current working directory is +always Docutils' main test directory (``test/``). + +For example, ``functional/tests/some_test.py`` could read like this:: + + # Source and destination file names. + test_source = "some_test.txt" + test_destination = "some_test.html" + + # Keyword parameters passed to publish_file. + reader_name = "standalone" + parser_name = "rst" + writer_name = "html" + settings_overrides['output-encoding'] = 'utf-8' + # Relative to main ``test/`` directory. + settings_overrides['stylesheet_path'] = '../docutils/writers/html4css1/html4css1.css' + +The two variables ``test_source`` and ``test_destination`` contain the +input file name (relative to ``functional/input/``) and the output +file name (relative to ``functional/output/`` and +``functional/expected/``). Note that the file names can be chosen +arbitrarily. However, the file names in ``functional/output/`` *must* +match the file names in ``functional/expected/``. + +If defined, ``_test_more`` must be a function with the following +signature:: + + def _test_more(expected_dir, output_dir, test_case, parameters): + +This function is called from the test case to perform tests beyond the +simple comparison of expected and actual output files. + +``test_source`` and ``test_destination`` are removed from the +namespace, as are all variables whose names begin with an underscore +("_"). The remaining names are passed as keyword arguments to +``docutils.core.publish_file``, so you can set reader, parser, writer +and anything else you want to configure. Note that +``settings_overrides`` is already initialized as a dictionary *before* +the execution of the config file. + + +Creating New Tests +------------------ + +In order to create a new test, put the input test file into +``functional/input/``. Then create a config file in +``functional/tests/`` which sets at least input and output file names, +reader, parser and writer. + +Now run ``test_functional.py``. The test will fail, of course, +because you do not have an expected output yet. However, an output +file will have been generated in ``functional/output/``. Check this +output file for validity [#]_ and correctness. Then copy the file to +``functional/expected/``. + +If you rerun ``test_functional.py`` now, it should pass. + +If you run ``test_functional.py`` later and the actual output doesn't +match the expected output anymore, the test will fail. + +If this is the case and you made an intentional change, check the +actual output for validity and correctness, copy it to +``functional/expected/`` (overwriting the old expected output), and +commit the change. + +.. [#] The validity of `Docutils XML` can be tested with + ``xmllint <document-referencing-local-Docutils-DTD>.xml --valid --noout``. + + .. note: the ``--dtdvalid`` and ``--nonet`` options did not help override + a reference to the PUBLIC "docutils.dtd" if there is a local version + on the system (e.g. /usr/share/xml/docutils/docutils.dtd in Debian). + + +.. _default configuration file: + +The Default Configuration File +------------------------------ + +The file ``functional/tests/_default.py`` contains default settings. +It is executed just before the actual configuration files, which has +the same effect as if the contents of ``_default.py`` were prepended +to every configuration file. diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/dev/todo.txt b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/dev/todo.txt new file mode 100644 index 00000000..50c682f3 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/dev/todo.txt @@ -0,0 +1,2869 @@ +====================== + Docutils_ To Do List +====================== + +:Author: David Goodger (with input from many); open to all Docutils + developers +:Contact: docutils-develop@lists.sourceforge.net +:Date: $Date$ +:Revision: $Revision$ +:Copyright: This document has been placed in the public domain. + +.. _Docutils: https://docutils.sourceforge.io/ + +.. contents:: + + +Priority items are marked with "@" symbols. The more @s, the higher +the priority. Items in question form (containing "?") are ideas which +require more thought and debate; they are potential to-do's. + +Many of these items are awaiting champions. If you see something +you'd like to tackle, please do! +Please see also the Bugs_ document for a list of bugs in Docutils. + +.. _bugs: ../../BUGS.html + + +Minimum Requirements for Python Standard Library Candidacy +========================================================== + +Below are action items that must be added and issues that must be +addressed before Docutils can be considered suitable to be proposed +for inclusion in the Python standard library. + +Many of these are now handled by Sphinx_ + +* Support for `document splitting`_. May require some major code + rework. + +* Support for subdocuments (see `large documents`_). + +* `Object numbering and object references`_. + +* `Nested inline markup`_. + +* `Python Source Reader`_. + +* The HTML writer needs to be rewritten (or a second HTML writer + added) to allow for custom classes, and for arbitrary splitting + (stack-based?). + +* Documentation_ of the architecture. Other docs too. + +* Plugin support. + +* Suitability for `Python module documentation + <https://docutils.sourceforge.io/sandbox/README.html#documenting-python>`_. + +.. _Sphinx: http://www.sphinx-doc.org/ + +Repository +========== + +Move to a Git repository. + +* This is a long standing `feature request`__ + (with pointers to Sphinx issues and discussion). + + __ https://sourceforge.net/p/docutils/feature-requests/58/ + +* From a `post by David Goodger`__ + + An absolute requirement, for me, is that such a change be complete. + We can't lose any data or have to refer to the old system as an + "archive". So all the SVN history, all branches, and the full sandbox + need to be converted at the same time. + + __ https://sourceforge.net/p/docutils/mailman/message/31878077/ + +Convert with reposurgeon_? + + If you are doing a full import rather than gatewaying, reposurgeon is + probably what you want. It has been tested against a lot of large, old, + nasty repositories and is thus known to be robust in the presence of + repository malformations (a property regularly checked by a test suite + that is a rogue's gallery of Subversion botches). + + -- `Git Wiki`__ + +The comprehensive `Reposurgeon documentation`_ comes with +`a guide to repository conversion`__ +as well as info about `reading Subversion repositories`__. +Converting from an SVN dump file is faster than from a checkout. + +.. _reposurgeon: http://www.catb.org/esr/reposurgeon/ +.. _reposurgeon documentation: + http://www.catb.org/esr/reposurgeon/repository-editing.html +__ https://git.wiki.kernel.org/index.php/ + Interfaces,_frontends,_and_tools#Subversion +__ http://www.catb.org/esr/reposurgeon/repository-editing.html#conversion +__ http://www.catb.org/esr/reposurgeon/repository-editing.html + #_reading_subversion_repositories + + +Adam Turner wrote a conversion Makefile and ``.lift`` scripts that +downloads the repo from SF with rsync, converts it to a SVN mirror and +finally to Git, splitting sandbox, prest, and web from docutils. + + +Sourceforge supports multiple Git repositories per project, so we can +switch the version control system independent of the decision on an +eventual switch of the host. +Cf. https://sourceforge.net/p/forge/documentation/Git/ + + +General +======= + +Miscellaneous +------------- + +Code cleanup and modernization: + Use flake8_? See also the configuration in `<../../tox.ini>`__. + + Check and solve issue from :PEP:`290` - Code Migration and Modernization. + (Covers issues up to Python 2.4, is there an equivalent for more recent + modernizations?) + + Ensure `backwards compatibility`_! + + .. _flake8: https://pypi.org/project/flake8/ + .. _backwards compatibility: policies.html#backwards-compatibility-policy + +* Encoding of command line arguments can only be guessed: + + * try UTF-8/strict first, then try the locale's encoding with + strict error handling, then ASCII/replace? + + UTF-8 is almost 100% safe to try first; false positives are rare, + The locale's encoding with strict error handling may be a + reasonable compromise, but any error would indicate that the + locale's encoding is inappropriate. The only safe fallback is + ASCII/replace. + + * Do not decode argv before option parsing but individual string + values? + + +1 Allows for separate command-line vs. filesystem encodings, + respectively to keep file names encoded. + +1 Allows to configure command-line encoding in a config file, + -1 More complicated. + + Cf. <http://thread.gmane.org/gmane.text.docutils.user/2890/focus=2957>. + +* Improve handling on Windows: + + - Get graphical installer. + - Make rst2html.py an .exe file using py2exe. + +* .. _GUI: + + The user interface is very difficult to use for most Windows users; + you can't really expect them to use the command line. We need some + kind of GUI that can launch rst2html.py, and save the HTML output to + a file, and launch a browser. What's important is that we get + settings to work with the GUI. So we need some way to dynamically + generate a list of settings for the GUI. The current settings_spec + for OptionParser doesn't seem to be usable for this for the + following reasons: + + - It's biased toward the command line -- there are *two* options for + one boolean setting. + + - You cannot have both a one-line description and a longer + description for tooltips/help-texts. + + - It doesn't provide hints for the input type. You cannot easily + infer the type of a setting from its validator, because any + component can add new validators. In fact, it may be necessary to + have both a hint about the input type (e.g. string) and a + validator (valid ID), or it may be necessary to have a different + set of choices for the CLI (1, INFO, 2, ...) and for the GUI + (INFO, WARNING, ...). + + - It's coupled to the OptionParser. We want to be able to change + the underlying system without breaking everything. + + - It's a bunch of primitive structures. We want an extensible (thus + object-oriented) interface. + + So we probably need to create a class for storing all the settings, + and auto-generate the OptionParser data from that. + + I talked to Stephan Deibel about getting Docutils integrated into + Wing IDE. He said it's possible, and he'd be willing to help. + There's a scripting interface to Wing, which we'd use. We can + dynamically generate a list of preferences and not worry too much + about the rendering (from what I understood); Wing's whole GUI is + dynamic anyway. The interface could be made usable for other GUIs. + For example, we could try to get option support for DocFactory. // + FW + +* Allow different report levels for STDERR and system_messages inside + the document? + +* Change the docutils-update script (in sandbox/infrastructure), to + support arbitrary branch snapshots. + +* Move some general-interest sandboxes out of individuals' + directories, into subprojects? + +* Add option for file (and URL) access restriction to make Docutils + usable in Wikis and similar applications. + + 2005-03-21: added ``file_insertion_enabled`` & ``raw_enabled`` + settings. These partially solve the problem, allowing or disabling + **all** file accesses, but not limited access. + +* Configuration_ file handling needs discussion: + + - There should be some error checking on the contents of config + files. How much checking should be done? How loudly should + Docutils complain if it encounters an error/problem? + + - Docutils doesn't complain when it doesn't find a configuration + file supplied with the ``--config`` option. Should it? (If yes, + error or warning?) + +* Internationalization: + + - I18n needs refactoring, the language dictionaries are difficult to + maintain. Maybe have a look at gettext or similar tools. + + (This would make a nice Google Summer of Code project) + + - Language modules: in accented languages it may be useful to have + both accented and unaccented entries in the + ``bibliographic_fields`` mapping for versatility. + + - Add a "--strict-language" option & setting: no English fallback + for language-dependent features. + + Make this the default for output (as opposed to input)? + Throw an error with a helpful message, e.g. + + Default "contents" title for language %s missing, please specify + an explicit title. + + or + + "attention" title for language %s missing, please use a generic + admonition with explicit title. + + - Add internationalization to _`footer boilerplate text` (resulting + from "--generator", "--source-link", and "--date" etc.), allowing + translations. + + +* Add validation? See http://pytrex.sourceforge.net, RELAX NG, pyRXP. + +* In ``docutils.readers.get_reader_class`` (& ``parsers`` & + ``writers`` too), should we be importing "standalone" or + "docutils.readers.standalone"? (This would avoid importing + top-level modules if the module name is not in docutils/readers. + Potential nastiness.) + +* Perhaps store a _`name-to-id mapping file`? This could be stored + permanently, read by subsequent processing runs, and updated with + new entries. ("Persistent ID mapping"?) + +* Perhaps the ``Component.supports`` method should deal with + individual features ("meta" etc.) instead of formats ("html" etc.)? + Currently, it is not used at all. + + Do we need it at all? Or rather let the writers just ignore some + nodes (like we already do for "class" values)? + + The current implementation of the framework also leads to bug + `bug #241`__ "doctree-based publishing != publish_string". + The "components.Filter" transform is run by publish_doctree(). When + filtering based on the output format, it should be run by + publish_from_doctree() instead because only then the writer is + known. + + So we need to either remove or fix the framework. + + __ https://sourceforge.net/p/docutils/bugs/241/ + + +* Think about _`large documents` made up of multiple subdocument + files. Issues: continuity (`persistent sequences`_ above), + cross-references (`name-to-id mapping file`_ above and `targets in + other documents`_ below), splitting (`document splitting`_ below). + + When writing a book, the author probably wants to split it up into + files, perhaps one per chapter (but perhaps even more detailed). + However, we'd like to be able to have references from one chapter to + another, and have continuous numbering (pages and chapters, as + applicable). Of course, none of this is implemented yet. There has + been some thought put into some aspects; see `the "include" + directive`__ and the `Reference Merging`_ transform below. + + When I was working with SGML in Japan, we had a system where there + was a top-level coordinating file, book.sgml, which contained the + top-level structure of a book: the <book> element, containing the + book <title> and empty component elements (<preface>, <chapter>, + <appendix>, etc.), each with filename attributes pointing to the + actual source for the component. Something like this:: + + <book id="bk01"> + <title>Title of the Book + + + + + + + + (The "inrefid" attribute stood for "insertion reference ID".) + + The processing system would process each component separately, but + it would recognize and use the book file to coordinate chapter and + page numbering, and keep a persistent ID to (title, page number) + mapping database for cross-references. Docutils could use a similar + system for large-scale, multipart documents. + + __ ../ref/rst/directives.html#including-an-external-document-fragment + + Aahz's idea: + + First the ToC:: + + .. ToC-list:: + Introduction.txt + Objects.txt + Data.txt + Control.txt + + Then a sample use:: + + .. include:: ToC.txt + + As I said earlier in chapter :chapter:`Objects.txt`, the + reference count gets increased every time a binding is made. + + Which produces:: + + As I said earlier in chapter 2, the + reference count gets increased every time a binding is made. + + The ToC in this form doesn't even need to be references to actual + reST documents; I'm simply doing it that way for a minimum of + future-proofing, in case I do want to add the ability to pick up + references within external chapters. + + Perhaps, instead of ToC (which would overload the "contents" + directive concept already in use), we could use "manifest". A + "manifest" directive might associate local reference names with + files:: + + .. manifest:: + intro: Introduction.txt + objects: Objects.txt + data: Data.txt + control: Control.txt + + Then the sample becomes:: + + .. include:: manifest.txt + + As I said earlier in chapter :chapter:`objects`, the + reference count gets increased every time a binding is made. + +* Add support for _`multiple output files` and _`generic data + handling`: + + It should be possible for a component to **emit or reference** data + to be either **included or referenced** in the output document. + Examples of such data are stylesheets or images. + + For this, we need a "data" object which stores the data either + inline or by referring to a file. The Docutils framework is + responsible for either: + + * storing the data in the appropriate location (e.g. in the + directory of the output file, or in a user-specified directory) + and providing the paths of the stored files to the writer, *or* + + * providing the data itself to the writer so that it can be embedded + in the output document. + + This approach decouples data handling from the data source (which + can either be embedded or referenced) and the destination (which can + either be embedded or referenced as well). + + See . + +* Add testing for Docutils' front end tools? + +* Publisher: "Ordinary setup" shouldn't require specific ordering; at + the very least, there ought to be error checking higher up in the + call chain. [Aahz] + + ``Publisher.get_settings`` requires that all components be set up + before it's called. Perhaps the I/O *objects* shouldn't be set, but + I/O *classes*. Then options are set up (``.set_options``), and + ``Publisher.set_io`` (or equivalent code) is called with source & + destination paths, creating the I/O objects. + + Perhaps I/O objects shouldn't be instantiated until required. For + split output, the Writer may be called multiple times, once for each + doctree, and each doctree should have a separate Output object (with + a different path). Is the "Builder" pattern applicable here? + +* Perhaps I/O objects should become full-fledged components (i.e. + subclasses of ``docutils.Component``, as are Readers, Parsers, and + Writers now), and thus have associated option/setting specs and + transforms. + +* Multiple file I/O suggestion from Michael Hudson: use a file-like + object or something you can iterate over to get file-like objects. + +* Add an "--input-language" option & setting? Specify a different + language module for input (bibliographic fields, directives) than + for output. The "--language" option would set both input & output + languages. + +* Auto-generate reference tables for language-dependent features? + Could be generated from the source modules. A special command-line + option could be added to Docutils front ends to do this. (Idea from + Engelbert Gruber.) + +* Enable feedback of some kind from internal decisions, such as + reporting the successful input encoding. Modify runtime settings? + System message? Simple stderr output? + +* Rationalize Writer settings (HTML/LaTeX/PEP) -- share settings. + +* Add an "--include file" command-line option (config setting too?), + equivalent to ".. include:: file" as the first line of the doc text? + Especially useful for character entity sets, text transform specs, + boilerplate, etc. + +* Support "include" as embedded inline-compatible directive in substitution + definitions, e.g. :: + + .. |version| include:: version.txt + + This document describes version |version| of ... + + (cf. Grzegorz Adam Hankiewicz's post from 2014-10-01 in docutils-devel) + +* Add an ``:optional: `` option to the "include" + directive? This would not throw an error for a missing file, instead a + warning is given and ```` is used instead. It would be + the responsibility of the author to ensure the missing file does not lead + to problems later in the document. + + Use cases: + + + Standard rST syntax to replace Sphinx's "literalinclude":: + + .. include:: blah.cpp + :literal: + :optional: file ``blah.cpp`` not found + + + Variable content taken from a file, e.g. + + version.txt:: + + .. |version| replace:: 3.1 + + optionally used as:: + + .. include:: version.txt + :optional: .. |version| replace:: unknown + + This document describes version |version| of ... + + (cf. Grzegorz Adam Hankiewicz's post from 2014-10-01 in docutils-devel) + +* Parameterize the Reporter object or class? See the `2004-02-18 + "rest checking and source path"`_ thread. + + .. _2004-02-18 "rest checking and source path": + http://thread.gmane.org/gmane.text.docutils.user/1112 + +* Add a "disable_transforms" setting? Would allow for easy syntax + checking. Where ("null" writer, generic, parser(s))? + Cf. the `2004-02-18 "rest checking and source path"`_ thread. + +* Add a generic meta-stylesheet mechanism? An external file could + associate style names ("class" attributes) with specific elements. + Could be generalized to arbitrary output attributes; useful for HTML + & XMLs. Aahz implemented something like this in + sandbox/aahz/Effective/EffMap.py. + +* .. _classes for table cells: + + William Dode suggested that table cells be assigned "class" + attributes by columns, so that stylesheets can affect text + alignment. Unfortunately, there doesn't seem to be a way (in HTML + at least) to leverage the "colspec" elements (HTML "col" tags) by + adding classes to them. The resulting HTML is very verbose:: + + 111 + 222 + ... + + At the very least, it should be an option. People who don't use it + shouldn't be penalized by increases in their HTML file sizes. + + Table rows could also be assigned classes (like odd/even). That + would be easier to implement. + + How should it be implemented? + + * There could be writer options (column classes & row classes) with + standard values. + + * The table directive could grow some options. Something like + ":cell-classes: col1 col2 col3" (either must match the number of + columns, or repeat to fill?) and ":row-classes: odd even" (repeat + to fill; body rows only, or header rows too?). + + Probably per-table directive options are best. The "class" values + could be used by any writer, and applying such classes to all tables + in a document with writer options is too broad. + + See also the `table_styling Sphinx extension`_ which defines + + :widths: also in Docutils core (but different implementation) + :column-alignment: Sets per-column text alignment + :column-wrapping: Sets per-column text wrapping + :column-dividers: Add dividers between columns + :column-classes: Add per-column css classes. + :header-columns: Specify number of “stub” columns + + .. _table_styling Sphinx extension: https://pythonhosted.org/cloud_sptheme/ + lib/cloud_sptheme.ext.table_styling.html + +* Add file-specific settings support to config files, like:: + + [file index.txt] + compact-lists: no + + Is this even possible? Should the criterion be the name of the + input file or the output file? Alternative (more explicit) syntax:: + + [source_file index.txt] + ... + + [dest_file index.html] + ... + + Or rather allow settings configuration from the rst source file + (see misc.settings_ directive)? + +* The "validator" support added to OptionParser is very similar to + "traits_" in SciPy_. Perhaps something could be done with them? + (Had I known about traits when I was implementing docutils.frontend, + I may have used them instead of rolling my own.) + + .. _traits: http://code.enthought.com/traits/ + .. _SciPy: http://www.scipy.org/ + +* tools/buildhtml.py: Extend the --prune option ("prune" config + setting) to accept file names (generic path) in addition to + directories (e.g. --prune=docs/user/rst/cheatsheet.txt, which should + *not* be converted to HTML). + +* Add support for _`plugins`. + +* _`Config directories`: Currently, ~/.docutils, ./docutils.conf/, & + /etc/docutils.conf are read as configuration_ files. Proposal: allow + ~/.docutils to be a a configuration *directory*, along with + /etc/docutils/ and ./docutils.conf/. Within these directories, + check for config.txt files. We can also have subdirectories here, + for plugins, S5 themes, components (readers/writers/parsers) etc. + + Docutils will continue to support configuration files for backwards + compatibility. + +* Add support for document decorations other than headers & footers? + For example, top/bottom/side navigation bars for web pages. Generic + decorations? + + Seems like a bad idea as long as it isn't independent from the output + format (for example, navigation bars are only useful for web pages). + +* docutils_update: Check for a ``Makefile`` in a directory, and run + ``make`` if found? This would allow for variant processing on + specific source files, such as running rst2s5.py instead of + rst2html.py. + +* Add a "disable table of contents" setting? The S5 writer could set + it as a default. Rationale: + + The ``contents`` (table of contents) directive must not be used + [in S5/HTML documents]. It changes the CSS class of headings + and they won't show up correctly in the screen presentation. + + -- `Easy Slide Shows With reStructuredText & S5 + <../user/slide-shows.html>`_ + + Analogue to the ``sectnum_xform`` setting, it could be used by the + latex writer to switch to a LaTeX generated ToC (currently, the latex + writer calls it "use_latex_toc"). + +object numbering and object references +-------------------------------------- + +For equations, tables & figures. + +These would be the equivalent of DocBook's "formal" elements. + +In LaTeX, automatic counters are implemented for sections, equations and +floats (figures, tables) (configurable via stylesheets or in the +latex-preamble). Objects can be given `reference names`_ with the +``\label{}`` inserts the +corresponding number. + +No such mechanism exists in HTML. + +* We need _`persistent sequences`, similar to chapter and footnote + numbers. See `OpenOffice.org XML`_ "fields". + + - Should the sequences be automatic or manual (user-specifyable)? + +* It is already possible to give `reference names`_ to objects via + internal hyperlink targets or the "name" directive option:: + + .. _figure name: + + .. figure:: image.png + + or :: + + .. figure:: image.png + :name: figure name + + Improve the mapping of "phrase references" to IDs/labels with Literal + transcription (i.e. ü -> ue, ß -> ss, å -> aa) instead of just + stripping the accents and other non-ASCII chars. See also the feature + request `allow more characters when transforming "names" to "ids"`__. + + A "table" directive has been implemented, supporting table titles. + + Perhaps the name could derive from the title/caption? + + .. _reference names: ../ref/rst/restructuredtext.html#reference-names + __ https://sourceforge.net/p/docutils/feature-requests/66/ + +* We need syntax for object references. Cf. `OpenOffice.org XML`_ + "reference fields": + + - Parameterized substitutions are too complicated + (cf. `or not to do`: `object references`_) + + - An interpreted text approach is simpler and better:: + + See Figure :ref:`figure name` and Equation :ref:`eq:identity`. + + - "equation", "figure", and "page" roles could generate appropriate + boilerplate text:: + + See :figure:`figure name` on :page:`figure name`. + + See `Interpreted Text`_ below. + + Reference boilerplate could be specified in the document + (defaulting to nothing):: + + .. fignum:: + :prefix-ref: "Figure " + :prefix-caption: "Fig. " + :suffix-caption: : + + The position of the role (prefix or suffix) could also be utilized + + .. _OpenOffice.org XML: http://xml.openoffice.org/ + .. _object references: rst/alternatives.html#object-references + +See also the `Modified rst2html +`__ +by Nicolas Rougier for a sample implementation. + + +Documentation +============= + +User Docs +--------- + +* Add a FAQ entry about using Docutils (with reStructuredText) on a + server and that it's terribly slow. See the first paragraphs in + . + +* Add document about what Docutils has previously been used for + (web/use-cases.txt?). + +* Improve index in docs/user/config.txt. + + +Developer Docs +-------------- + +* Improve the internal module documentation (docstrings in the code). + Specific deficiencies listed below. + + - docutils.parsers.rst.states.State.build_table: data structure + required (including StringList). + + - docutils.parsers.rst.states: more complete documentation of parser + internals. + +* docs/ref/doctree.txt: DTD element structural relationships, + semantics, and attributes. In progress; element descriptions to be + completed. + +* Document the ``pending`` elements, how they're generated and what + they do. + +* Document the transforms_ (perhaps in docstrings?): how they're used, + what they do, dependencies & order considerations. + +* Document the HTML classes used by html4css1.py. + +* Write an overview of the Docutils architecture, as an introduction + for developers. What connects to what, why, and how. Either update + PEP 258 (see PEPs_ below) or as a separate doc. + +* Give information about unit tests. Maybe as a howto? + +* Document the docutils.nodes APIs. + +* Complete the docs/api/publisher.txt docs. + + +How-Tos +------- + +* Creating Docutils Writers + +* Creating Docutils Readers + +* Creating Docutils Transforms_ + +* Creating Docutils Parsers + +* Using Docutils as a Library + + +PEPs +---- + +* Complete PEP 258 Docutils Design Specification. + + - Fill in the blanks in API details. + + - Specify the nodes.py internal data structure implementation? + + [Tibs:] Eventually we need to have direct documentation in + there on how it all hangs together - the DTD is not enough + (indeed, is it still meant to be correct? [Yes, it is. + --DG]). + +* Rework PEP 257, separating style from spec from tools, wrt Docutils? + See Doc-SIG from 2001-06-19/20. + + +Python Source Reader +==================== + +General: + +* Analyze Tony Ibbs' PySource code. + +* Analyze Doug Hellmann's HappyDoc project. + +* Investigate how POD handles literate programming. + +* Take the best ideas and integrate them into Docutils. + +Miscellaneous ideas: + +* Ask Python-dev for opinions (GvR for a pronouncement) on special + variables (__author__, __version__, etc.): convenience vs. namespace + pollution. Ask opinions on whether or not Docutils should recognize + & use them. + +* If we can detect that a comment block begins with ``##``, a la + JavaDoc, it might be useful to indicate interspersed section headers + & explanatory text in a module. For example:: + + """Module docstring.""" + + ## + # Constants + # ========= + + a = 1 + b = 2 + + ## + # Exception Classes + # ================= + + class MyException(Exception): pass + + # etc. + +* Should standalone strings also become (module/class) docstrings? + Under what conditions? We want to prevent arbitrary strings from + becoming docstrings of prior attribute assignments etc. Assume + that there must be no blank lines between attributes and attribute + docstrings? (Use lineno of NEWLINE token.) + + Triple-quotes are sometimes used for multi-line comments (such as + commenting out blocks of code). How to reconcile? + +* HappyDoc's idea of using comment blocks when there's no docstring + may be useful to get around the conflict between `additional + docstrings`_ and ``from __future__ import`` for module docstrings. + A module could begin like this:: + + #!/usr/bin/env python + # :Author: Me + # :Copyright: whatever + + """This is the public module docstring (``__doc__``).""" + + # More docs, in comments. + # All comments at the beginning of a module could be + # accumulated as docstrings. + # We can't have another docstring here, because of the + # ``__future__`` statement. + + from __future__ import division + + Using the JavaDoc convention of a doc-comment block beginning with + ``##`` is useful though. It allows doc-comments and implementation + comments. + + .. _additional docstrings: + ../peps/pep-0258.html#additional-docstrings + +* HappyDoc uses an initial comment block to set "parser configuration + values". Do the same thing for Docutils, to set runtime settings on + a per-module basis? I.e.:: + + # Docutils:setting=value + + Could be used to turn on/off function parameter comment recognition + & other marginal features. Could be used as a general mechanism to + augment config files and command-line options (but which takes + precedence?). + +* Multi-file output should be divisible at arbitrary level. + +* Support all forms of ``import`` statements: + + - ``import module``: listed as "module" + - ``import module as alias``: "alias (module)" + - ``from module import identifier``: "identifier (from module)" + - ``from module import identifier as alias``: "alias (identifier + from module)" + - ``from module import *``: "all identifiers (``*``) from module" + +* Have links to colorized Python source files from API docs? And + vice-versa: backlinks from the colorized source files to the API + docs! + +* In summaries, use the first *sentence* of a docstring if the first + line is not followed by a blank line. + + +reStructuredText Parser +======================= + +Also see the `... Or Not To Do?`__ list. + +__ rst/alternatives.html#or-not-to-do + +Bugs +---- + +* A container directive with ``:class:`` option gets the spurious + class value "class". + +Misc +---- + +* Another list problem:: + + * foo + * bar + * baz + + This ends up as a definition list. This is more of a usability + issue. + +* This case is probably meant to be a nested list, but it ends up as a + list inside a block-quote without an error message:: + + - foo + + - bar + + It should probably just be an error. + + The problem with this is that you don't notice easily in HTML that + it's not a nested list but a block-quote -- there's not much of a + visual difference. + +* Treat enumerated lists that are not arabic and consist of only one + item in a single line as ordinary paragraphs. See + . + +* The citation syntax could use some improvements. See + (and the + sub-thread at + , + and the follow-ups at + , + , + ), + , + , + , + , + . + +* The current list-recognition logic has too many false positives, as + in :: + + * Aorta + * V. cava superior + * V. cava inferior + + Here ``V.`` is recognized as an enumerator, which leads to + confusion. We need to find a solution that resolves such problems + without complicating the spec to much. + + See . + +* Add indirect links via citation references & footnote references. + Example:: + + `Goodger (2005)`_ is helpful. + + .. _Goodger (2005): [goodger2005]_ + .. [goodger2005] citation text + + See . + +* Complain about bad URI characters + (http://article.gmane.org/gmane.text.docutils.user/2046) and + disallow internal whitespace + (http://article.gmane.org/gmane.text.docutils.user/2214). + +* Create ``info``-level system messages for unnecessarily + backslash-escaped characters (as in ``"\something"``, rendered as + "something") to allow checking for errors which silently slipped + through. + +* Add (functional) tests for untested roles. + +* Add test for ":figwidth: image" option of "figure" directive. (Test + code needs to check if PIL is available on the system.) + +* Add support for CJK double-width whitespace (indentation) & + punctuation characters (markup; e.g. double-width "*", "-", "+")? + +* Add motivation sections for constructs in spec. + +* Support generic hyperlink references to _`targets in other + documents`? Not in an HTML-centric way, though (it's trivial to say + ``https://www.example.org/doc#name``, and useless in non-HTML + contexts). XLink/XPointer? ``.. baseref::``? See Doc-SIG + 2001-08-10. + +* Implement the header row separator modification to table.el. (Wrote + to Takaaki Ota & the table.el mailing list on 2001-08-12, suggesting + support for "=====" header rows. On 2001-08-17 he replied, saying + he'd put it on his to-do list, but "don't hold your breath".) + +* Fix the parser's indentation handling to conform with the stricter + definition in the spec. (Explicit markup blocks should be strict or + forgiving?) + + .. XXX What does this mean? Can you elaborate, David? + +* Make the parser modular. Allow syntax constructs to be added or + disabled at run-time. Subclassing is probably not enough because it + makes it difficult to apply multiple extensions. + +* Generalize the "doctest block" construct (which is overly + Python-centric) to other interactive sessions? "Doctest block" + could be renamed to "I/O block" or "interactive block", and each of + these could also be recognized as such by the parser: + + - Shell sessions:: + + $ cat example1.txt + A block beginning with a "$ " prompt is interpreted as a shell + session interactive block. As with Doctest blocks, the + interactive block ends with the first blank line, and wouldn't + have to be indented. + + - Root shell sessions:: + + # cat example2.txt + A block beginning with a "# " prompt is interpreted as a root + shell session (the user is or has to be logged in as root) + interactive block. Again, the block ends with a blank line. + + Other standard (and unambiguous) interactive session prompts could + easily be added (such as "> " for WinDOS). + + Tony Ibbs spoke out against this idea (2002-06-14 Doc-SIG thread + "docutils feedback"). + +* Add support for pragma (syntax-altering) directives. + + Some pragma directives could be local-scope unless explicitly + specified as global/pragma using ":global:" options. + +* Support whitespace in angle-bracketed standalone URLs according to + Appendix E ("Recommendations for Delimiting URI in Context") of `RFC + 2396`_. + + .. _RFC 2396: https://www.rfc-editor.org/rfc/rfc2396.txt + +* Use the vertical spacing of the source text to determine the + corresponding vertical spacing of the output? + +* [From Mark Nodine] For cells in simple tables that comprise a + single line, the justification can be inferred according to the + following rules: + + 1. If the text begins at the leftmost column of the cell, + then left justification, ELSE + 2. If the text begins at the rightmost column of the cell, + then right justification, ELSE + 3. Center justification. + + The onus is on the author to make the text unambiguous by adding + blank columns as necessary. There should be a parser setting to + turn off justification-recognition (normally on would be fine). + + Decimal justification? + + All this shouldn't be done automatically. Only when it's requested + by the user, e.g. with something like this:: + + .. table:: + :auto-indent: + + (Table goes here.) + + Otherwise it will break existing documents. + +* Generate a warning or info message for paragraphs which should have + been lists, like this one:: + + 1. line one + 3. line two + +* Generalize the "target-notes" directive into a command-line option + somehow? See docutils-develop 2003-02-13. + +* Allow a "::"-only paragraph (first line, actually) to introduce a + _`literal block without a blank line`? (Idea from Paul Moore.) :: + + :: + This is a literal block + + Is indentation enough to make the separation between a paragraph + which contains just a ``::`` and the literal text unambiguous? + (There's one problem with this concession: If one wants a definition + list item which defines the term "::", we'd have to escape it.) It + would only be reasonable to apply it to "::"-only paragraphs though. + I think the blank line is visually necessary if there's text before + the "::":: + + The text in this paragraph needs separation + from the literal block following:: + This doesn't look right. + +* Add new syntax for _`nested inline markup`? Or extend the parser to + parse nested inline markup somehow? See the `collected notes + `__. + +* Drop the backticks from embedded URIs with omitted reference text? + Should the angle brackets be kept in the output or not? :: + + _ + + Probably not worth the trouble. + +* How about a syntax for alternative hyperlink behavior, such as "open + in a new window" (as in HTML's ````)? + + The MoinMoin wiki uses a caret ("^") at the beginning of the URL + ("^" is not a legal URI character). That could work for both inline + and explicit targets:: + + The `reference docs <^url>`__ may be handy. + + .. _name: ^url + + This may be too specific to HTML. It hasn't been requested very + often either. + +* Add an option to add URI schemes at runtime. + +* _`Segmented lists`:: + + : segment : segment : segment + : segment : segment : very long + segment + : segment : segment : segment + + The initial colon (":") can be thought of as a type of bullet + + We could even have segment titles:: + + :: title : title : title + : segment : segment : segment + : segment : segment : segment + + This would correspond well to DocBook's SegmentedList. Output could + be tabular or "name: value" pairs, as described in DocBook's docs. + +* Enable grid _`tables inside XML comments`, where "``--``" ends comments. + + Implementation possibilities: + + 1. Make the table syntax characters into "table" directive options. + This is the most flexible but most difficult, and we probably + don't need that much flexibility. + + 2. Substitute "~" for "-" with a specialized directive option + (e.g. ":tildes:"). + + 3. Make the standard table syntax recognize "~" as well as "-", even + without a directive option. Individual tables would have to be + internally consistent. + + 4. Allow Unicode box characters for table markup + (`feature request [6]`_) + + Directive options are preferable to configuration settings, because + tables are document-specific. A pragma directive would be another + approach, to set the syntax once for a whole document. + + Unicode box character markup would kill two birds with one stone. + + In the meantime, the list-table_ directive is a good replacement for + grid tables inside XML comments. + + .. _feature request [6]: + https://sourceforge.net/p/docutils/feature-requests/6 + .. _list-table: ../ref/rst/directives.html#list-table + + +* Generalize docinfo contents (bibliographic fields): remove specific + fields, and have only a single generic "field"? + +* _`Line numbers` and "source" in system messages: + + - Add "source" and "line" keyword arguments to all Reporter calls? + This would require passing source/line arguments along all + intermediate functions (where currently only `line` is used). + + Or rather specify "line" only if actually needed? + + Currently, `document.reporter` uses a state machine instance to + determine the "source" and "line" info from + `statemachine.input_lines` if not given explicitly. Except for + special cases, the "line" argument is not needed because, + `document.statemachine` keeps record of the current line number. + + - For system messages generated after the parsing is completed (i.e. by + transforms or the writer) "line" info must be present in the doctree + elements. + + Elements' .line assignments should be checked. (Assign to .source + too? Add a set_info method? To what?) + + The "source" (and line number in the source) can either be added + explicitly to the elements or determined from the “raw” line + number by `document.statemachine.get_source_and_line`. + + - Some line numbers in elements are not being set properly + (explicitly), just implicitly/automatically. See rev. 1.74 of + docutils/parsers/rst/states.py for an example of how to set. + + - The line numbers of definition list items are wrong:: + + $ rst2pseudoxml.py --expose-internal-attribute line + 1 + 2 + 3 + + 5 + 6 + 7 + + + + + + 1 + + + 2 + 3 + + + 5 + + + 6 + 7 + +* .. _none source: + + Quite a few nodes are getting a "None" source attribute as well. In + particular, see the bodies of definition lists. + + +Adaptable file extensions +------------------------- + +Questions +````````` + +Should Docutils support adaptable file extensions in hyperlinks? + + In the rST source, sister documents are ".txt" files. If we're + generating HTML, then ".html" is appropriate; if PDF, then ".pdf"; + etc. + +Handle documents only, or objects (images, etc.) also? + + Different output formats support different sets of image formats (HTML + supports ".svg" but not ".pdf", pdfLaTeX supports ".pdf" but not ".svg", + LaTeX supports only ".eps"). + + This is less urgent 2020 than 2004, as `pdflatex` and `lualatex` are + now standard and support most image formats. Also, a wrapper like + `rubber`__ that provides on-the-fly image conversion depends on the + "wrong" extension in the LaTeX source. + + __ https://pypi.org/project/rubber/ + +At what point should the extensions be substituted? + + Transforms_: + Fits well in the `Reader → Transformer → Writer`__ processing framework. + + * Filename/URL extension replacement can be done walking over the + Document tree transforming the document tree from a valid state + to another valid state. + + * Writer-specific configuration is still possible in the + respective sections of the configuration_ file. + + __ ../peps/pep-0258.html#id24 + + Pre- or post-processing: + Can be implemented independent of Docutils -- keeps Docutils simple. + + ... those who need more sophisticated filename extension + tweaking can simply use regular expressions, which isn't too + difficult due to the determinability of the writers. So there + is no need to add a complex filename-extension-handling feature + to Docutils. + + --- `Lea Wiemann in docutils-users 2004-06-04`__ + + __ https://sourceforge.net/p/docutils/mailman/message/6918089/ + + +Proposals +````````` + +How about using ".*" to indicate "choose the most appropriate filename +extension"? For example:: + + .. _Another Document: another.* + +* My point about using ``.*`` is that any other mechanism inside reST + leads to too many ambiguities in reading reST documents; at least + with ``.*`` it's clear that some kind of substitution is going on. + + --- Aahz + +* What is to be done for output formats that don't *have* hyperlinks? + For example, LaTeX targeted at print. Hyperlinks may be "called + out", as footnotes with explicit URLs. (Don't convert the links.) + + But then there's also LaTeX targeted at PDFs, which *can* have + links. Perhaps a runtime setting for "*" could explicitly provide + the extension, defaulting to the output file's extension. + +* If this handles images also, how to differentiate between document + and image links? Element context (within "image")? Which image + extension to use for which document format? For HTML output, there + is no reliable way of determining which extension to use (svg, png, + jpg, jpeg, gif, ...). + + Should the system check for existing files? No, not practical (the + image files may be not available when the document is processed to HTML). + + Mailing list threads: `Images in both HTML and LaTeX`__ (especially + `this summary of Lea's objections`__). + + __ https://sourceforge.net/p/docutils/mailman/docutils-users/thread/40BAA4B7.5020801%40python.org/#msg6918066 + __ https://sourceforge.net/p/docutils/mailman/message/6918089/ + +Chris Liechti suggests a new ``:link:`` role in `more-universal +links?`__:: + + .. role:: link(rewrite) + :transform: .txt|.html + + and then to use it:: + + for more information see :link:`README.txt` + + it would be useful if it supported an additional option + ``:format: html`` so that separate rules for each format can be + defined. (like the "raw" role) + +__ https://sourceforge.net/p/docutils/mailman/message/6919484/ + + +Idea from Jim Fulton: an external lookup table of targets: + + I would like to specify the extension (e.g. .txt) [in the + source, rather than ``filename.*``], but tell the converter to + change references to the files anticipating that the files will + be converted too. + + For example:: + + .. _Another Document: another.txt + + rst2html.py --convert-links "another.txt bar.txt" foo.txt + + That is, name the files for which extensions should be converted. + + Note that I want to refer to original files in the original text + (another.txt rather than another.*) because I want the + unconverted text to stand on its own. + + Note that in most cases, people will be able to use globs:: + + rst2html.py --convert-link-extensions-for "`echo *.txt`" foo.txt + + It might be nice to be able to use multiple arguments, as in:: + + rst2html.py --convert-link-extensions-for *.txt -- foo.txt + + > Handle documents only, or objects (images, etc.) also? + + No, documents only, but there really is no need for guesswork. + Just get the file names as command-line arguments. EIBTI + [explicit is better than implicit]. + +In `Patch #169`__ `Hyperlink extension rewriting`, John L. Clark +suggests command line options that map to-be-changed file extensions, e.g.:: + + rst2html --map-extension rst html --map-extension jpg png \ + input-filename.rst + +__ https://sourceforge.net/p/docutils/patches/169/ + + Specifying the mapping as regular expressions would make this + approach more generic and easier to implement (use ``re.replace`` + and refer to the "re" module's documentation instead of coding and + documenting a home-grown extraction and mapping procedure). + + +Math Markup +----------- + +Since Docutils 0.8, a "math" role and directive using LaTeX math +syntax as input format is part of reStructuredText. + +Open issues: + +* Use a "Transform" for math format conversions as extensively discussed in + the "math directive issues" thread in May 2008 + (http://osdir.com/ml/text.docutils.devel/2008-05/threads.html)? + +* Generic `math-output setting`_ (currently specific to HTML). + (List of math-output preferences?) + +* Try to be compatible with `Math support in Sphinx`_? + + * The ``:label:`` option selects a label for the equation, by which it + can be cross-referenced, and causes an equation number to be issued. + In Docutils, the option ``:name:`` sets the label. + Equation numbering is not implemented yet. + + * Option ``:nowrap:`` prevents wrapping of the given math in a + math environment (you have to specify the math environment in the + content). + + .. _Math support in Sphinx: http://sphinx.pocoo.org/ext/math.html + +* Equation numbering and references. (see the section on + `object numbering and object references` for equations, + formal tables, and images.) + +.. _math-output setting: ../user/config.html#math-output + + +alternative input formats +````````````````````````` + +Use a directive option to specify an alternative input format, e.g. (but not +limited to): + +MathML_ + Not for hand-written code but maybe useful when pasted in (or included + from a file) + + For an overview of MathML implementations and tests, see, e.g., + the `mathweb wiki`_ or the `ConTeXT MathML page`_. + + .. _MathML: https://www.w3.org/TR/MathML2/ + .. _mathweb wiki: http://www.mathweb.org/wiki/MathML + .. _ConTeXT MathML page: http://wiki.contextgarden.net/MathML + + A MathML to LaTeX XSLT sheet: + https://github.com/davidcarlisle/web-xslt/tree/master/pmml2tex + + +ASCIIMath_ + Simple, ASCII based math input language (see also `ASCIIMath tutorial`_). + + * The Python module ASCIIMathML_ translates a string with ASCIIMath into a + MathML tree. Used, e.g., by MultiMarkdown__. + + A more comprehensive implementation is ASCIIMathPython_ by + Paul Trembley (also used in his sandbox projects). + + * For conversion to LaTeX, there is + + - a JavaScript script at + http://dlippman.imathas.com/asciimathtex/ASCIIMath2TeX.js + + - The javascript `asciimath-to-latex` AsciiMath to LaTex converter at + the node package manager + https://www.npmjs.com/package/asciimath-to-latex + and at GitHub https://github.com/tylerlong/asciimath-to-latex + + - a javascript and a PHP converter script at GitHub + https://github.com/asciimath/asciimathml/tree/master/asciimath-based + + .. _ASCIIMath: http://www1.chapman.edu/~jipsen/mathml/asciimath.html + .. _ASCIIMath tutorial: + http://www.wjagray.co.uk/maths/ASCIIMathTutorial.html + .. _ASCIIMathML: http://pypi.python.org/pypi/asciimathml/ + .. _ASCIIMathPython: https://github.com/paulhtremblay/asciimathml + __ http://fletcherpenney.net/multimarkdown/ + +`Unicode Nearly Plain Text Encoding of Mathematics`_ + format for lightly marked-up representation of mathematical + expressions in Unicode. + + (Unicode Technical Note. Sole responsibility for its contents rests + with the author(s). Publication does not imply any endorsement by + the Unicode Consortium.) + + .. _Unicode Nearly Plain Text Encoding of Mathematics: + https://www.unicode.org/notes/tn28/ + +itex + See `the culmination of a relevant discussion in 2003 + `__. + + + +LaTeX output +```````````` + +Which equation environments should be supported by the math directive? + +* one line: + + + numbered: `equation` + + unnumbered: `equation*` + +* multiline (test for ``\\`` outside of a nested environment + (e.g. `array` or `cases`) + + + numbered: `align` (number every line) + + (To give one common number to all lines, put them in a `split` + environment. Docutils then places it in an `equation` environment.) + + + unnumbered: `align*` + + + Sphinx math also supports `gather` (checking for blank lines in + the content). Docutils puts content blocks separated by blank + lines in separate math-block doctree nodes. (The only difference of + `gather` to two consecutive "normal" environments seems to be that + page-breaks between the two are prevented.) + +See http://www.math.uiuc.edu/~hildebr/tex/displays.html. + + +HTML output +``````````` + +There is no native math support in HTML. +For supported math output variants see the `math-output setting`_. +Add more/better alternatives? + +MathML_ + Converters from LaTeX to MathML include + + * TtM_ (C), ``--math-output=MathML ttm``, undocumented, may be removed. + + No "matrix", "align" and "cases" environments. + + * MathToWeb_ (Java) + * TeX4ht_ (TeX based) + * itex_ (also `used in Abiword`__) + * `Steve’s LATEX-to-MathML translator`_ + ('mini-language', javascript, Python) + * `MathJax for Node`_ + + * Write a new converter? E.g. based on: + + * a generic tokenizer (see e.g. a `latex-codec recipe`_, + `updated latex-codec`_, ) + * the Unicode-Char <-> LaTeX mappings database unimathsymbols_ + + __ http://msevior.livejournal.com/26377.html + .. _MathML: https://www.w3.org/TR/MathML2/ + .. _ttm: http://hutchinson.belmont.ma.us/tth/mml/ + .. _TeX4ht: http://www.tug.org/applications/tex4ht/mn.html + .. _MathToWeb: http://www.mathtoweb.com/ + .. _itex: http://golem.ph.utexas.edu/~distler/blog/itex2MMLcommands.html + .. _Steve’s LATEX-to-MathML translator: + http://www.gold-saucer.org/mathml/greasemonkey/dist/display-latex + .. _latex-codec recipe: + http://code.activestate.com/recipes/252124-latex-codec/ + .. _updated latex-codec: + http://mirror.ctan.org/biblio/bibtex/utils/mab2bib/latex.py + .. _unimathsymbols: http://milde.users.sourceforge.net/LUCR/Math/ + .. _MathJax for Node: https://github.com/mathjax/MathJax-node + +.. URL seems down: + .. _itex: http://pear.math.pitt.edu/mathzilla/itex2mmlItex.html + + +HTML/CSS + format math in standard HTML enhanced by CSS rules + (Overview__, `Examples and experiments`__). + The ``math-output=html`` option uses the converter from eLyXer_ + (included with Docutils). + + Alternatives: LaTeX-math to HTML/CSS converters include + + * TtH_ (C) + * Hevea_ (Objective Caml) + * `MathJax for Node`_ + * KaTeX_ + + __ http://www.cs.tut.fi/~jkorpela/math/ + __ http://www.zipcon.net/~swhite/docs/math/math.html + .. _elyxer: http://elyxer.nongnu.org/ + .. _TtH: ttp://hutchinson.belmont.ma.us/tth/index.html + .. _Hevea: http://para.inria.fr/~maranget/hevea/ + .. _KaTeX: https://katex.org + +images + (PNG or SVG) like e.g. Wikipedia. + + * dvisvgm_ + * the pure-python MathML->SVG converter SVGMath_) + * `MathJax for Node`_ + + .. _dvisvgm: http://dvisvgm.sourceforge.net/ + .. _SVGMath: http://www.grigoriev.ru/svgmath/ + + +client side JavaScript conversion + Use TeX notation in the web page and JavaScript in the displaying browser. + (implemented as `math-output setting`_ "mathjax"). + + * jqMath_ (faster and lighter than MathJax_) + + .. _MathJax: http://www.mathjax.org/ + .. _jqMath: http://mathscribe.com/author/jqmath.html + +OpenOffice output +````````````````` + +* The `OpenDocument standard`_ version 1.1 says: + + Mathematical content is represented by MathML 2.0 + + However, putting MathML into an ODP file seems tricky as these + (maybe outdated) links suppose: + http://idippedut.dk/post/2008/01/25/Do-your-math-ODF-and-MathML.aspx + http://idippedut.dk/post/2008/03/03/Now-I-get-it-ODF-and-MathML.aspx + + .. _OpenDocument standard: + http://www.oasis-open.org/standards#opendocumentv1.1 + +* OOoLaTeX__: "a set of macros designed to bring the power of LaTeX + into OpenOffice." + + __ http://ooolatex.sourceforge.net/ + + +Directives +---------- + +Directives below are often referred to as "module.directive", the +directive function. The "module." is not part of the directive name +when used in a document. + +* Allow for field lists in list tables. See + . + +* .. _unify tables: + + Unify table implementations and unify options of table directives + (http://article.gmane.org/gmane.text.docutils.user/1857). + +* Allow directives to be added at run-time? + +* Use the language module for directive option names? + +* Add "substitution_only" and "substitution_ok" function attributes, + and automate context checking? + +* Implement options or features on existing directives: + + - All directives that produce titled elements should grow implicit + reference names based on the titles. + + - Allow the _`:trim:` option for all directives when they occur in a + substitution definition, not only the unicode_ directive. + + .. _unicode: ../ref/rst/directives.html#unicode-character-codes + + - Add the "class" option to the unicode_ directive. For example, you + might want to get characters or strings with borders around them. + + - _`images.figure`: "title" and "number", to indicate a formal + figure? + + - _`parts.sectnum`: "local"?, "refnum" + + A "local" option could enable numbering for sections from a + certain point down, and sections in the rest of the document are + not numbered. For example, a reference section of a manual might + be numbered, but not the rest. OTOH, an all-or-nothing approach + would probably be enough. + + The "sectnum" directive should be usable multiple times in a + single document. For example, in a long document with "chapter" + and "appendix" sections, there could be a second "sectnum" before + the first appendix, changing the sequence used (from 1,2,3... to + A,B,C...). This is where the "local" concept comes in. This part + of the implementation can be left for later. + + A "refnum" option (better name?) would insert reference names + (targets) consisting of the reference number. Then a URL could be + of the form ``http://host/document.html#2.5`` (or "2-5"?). Allow + internal references by number? Allow name-based *and* + number-based ids at the same time, or only one or the other (which + would the table of contents use)? Usage issue: altering the + section structure of a document could render hyperlinks invalid. + + - _`parts.contents`: Add a "suppress" or "prune" option? It would + suppress contents display for sections in a branch from that point + down. Or a new directive, like "prune-contents"? + + Add an option to include topics in the TOC? Another for sidebars? + The "topic" directive could have a "contents" option, or the + "contents" directive" could have an "include-topics" option. See + docutils-develop 2003-01-29. + + - _`parts.header` & _`parts.footer`: Support multiple, named headers + & footers? For example, separate headers & footers for odd, even, + and the first page of a document. + + This may be too specific to output formats which have a notion of + "pages". + + - _`misc.class`: + + - Add a ``:parent:`` option for setting the parent's class + (http://article.gmane.org/gmane.text.docutils.devel/3165). + + - _`misc.include`: + + - Option to label lines? + + - How about an environment variable, say RSTINCLUDEPATH or + RSTPATH, for standard includes (as in ``.. include:: ``)? + This could be combined with a setting/option to allow + user-defined include directories. + + - Add support for inclusion by URL? :: + + .. include:: + :url: https://www.example.org/inclusion.txt + + - Strip blank lines from begin and end of a literal included file or + file section. This would correspond to the way a literal block is + handled. + + As nodes.literal_block expects (and we have) the text as a string + (rather than a list of lines), using a regexp seems the way. + + - _`misc.raw`: add a "destination" option to the "raw" directive? :: + + .. raw:: html + :destination: head + + + + It needs thought & discussion though, to come up with a consistent + set of destination labels and consistent behavior. + + And placing HTML code inside the element of an HTML + document is rather the job of a templating system. + + - _`body.sidebar`: Allow internal section structure? Adornment + styles would be independent of the main document. + + That is really complicated, however, and the document model + greatly benefits from its simplicity. + +* Implement directives. Each of the list items below begins with an + identifier of the form, "module_name.directive_function_name". The + directive name itself could be the same as the + directive_function_name, or it could differ. + + - _`html.imagemap` + + It has the disadvantage that it's only easily implementable for + HTML, so it's specific to one output format. + + (For non-HTML writers, the imagemap would have to be replaced with + the image only.) + + - _`parts.endnotes` (or "footnotes"): See `Footnote & Citation Gathering`_. + + - _`parts.citations`: See `Footnote & Citation Gathering`_. + + - _`misc.language`: Specify (= change) the language of a document at + parse time? + + * The misc.settings_ directive suggested below offers a more generic + approach. + + * The language of document parts can be indicated by the "special class + value" ``"language-"`` + `BCP 47`_ language code. Class arguments to + the title are attached to the document's base node - hence titled + documents can be given a different language at parse time. However, + "language by class attribute" does not change parsing (localized + directives etc.), only supporting writers. + + .. _BCP 47: https://www.rfc-editor.org/rfc/bcp/bcp47.txt + + + - _`misc.settings`: Set any(?) Docutils runtime setting from within + a document? Needs much thought and discussion. + + Security concerns need to be taken into account (it shouldn't be + possible to enable ``file_insertion_enabled`` from within a + document), and settings that only would have taken effect before + the directive (like ``tab-width``) shouldn't be accessible either. + + See this sub-thread: + + + - _`misc.gather`: Gather (move, or copy) all instances of a specific + element. A generalization of the `Footnote & Citation Gathering`_ + ideas. + + - Add a custom "directive" directive, equivalent to "role"? For + example:: + + .. directive:: incr + + .. class:: incremental + + .. incr:: + + "``.. incr::``" above is equivalent to "``.. class:: incremental``". + + Another example:: + + .. directive:: printed-links + + .. topic:: Links + :class: print-block + + .. target-notes:: + :class: print-inline + + This acts like macros. The directive contents will have to be + evaluated when referenced, not when defined. + + * Needs a better name? "Macro", "substitution"? + * What to do with directive arguments & options when the + macro/directive is referenced? + + - Make the meaning of block quotes overridable? Only a 1-shot + though; doesn't solve the general problem. + + - _`conditional directives`: + + .. note:: See also the implementation in Sphinx_. + + Docutils already has the ability to say "use this content for + Writer X" via the "raw" directive. It also does have the ability + to say "use this content for any Writer other than X" via the + "strip-elements with class" config value. However, using "raw" + input just to select a special writer is inconvenient in many + cases. + It wouldn't be difficult to get more straightforward support, though. + + My first idea would be to add a set of conditional directives. + Let's call them "writer-is" and "writer-is-not" for discussion + purposes (don't worry about implementation details). We might + have:: + + .. writer-is:: text-only + + :: + + +----------+ + | SNMP | + +----------+ + | UDP | + +----------+ + | IP | + +----------+ + | Ethernet | + +----------+ + + .. writer-is:: pdf + + .. figure:: protocol_stack.eps + + .. writer-is-not:: text-only pdf + + .. figure:: protocol_stack.png + + This could be an interface to the Filter transform + (docutils.transforms.components.Filter). + + The ideas in the `adaptable file extensions`_ section above may + also be applicable here. + + SVG's "switch" statement may provide inspiration. + + Here's an example of a directive that could produce multiple + outputs (*both* raw troff pass-through *and* a GIF, for example) + and allow the Writer to select. :: + + .. eqn:: + + .EQ + delim %% + .EN + %sum from i=o to inf c sup i~=~lim from {m -> inf} + sum from i=0 to m sup i% + .EQ + delim off + .EN + + - _`body.example`: Examples; suggested by Simon Hefti. Semantics as + per Docbook's "example"; admonition-style, numbered, reference, + with a caption/title. + + - _`body.index`: Index targets. + + See `Index Entries & Indexes + <./rst/alternatives.html#index-entries-indexes>`__. + + - _`body.literal`: Literal block, possibly "formal" (see `object + numbering and object references`_ above). Possible options: + + - "highlight" a range of lines + + - include only a specified range of lines + + - "number" or "line-numbers"? (since 0.9 available with "code" directive) + + - "styled" could indicate that the directive should check for + style comments at the end of lines to indicate styling or + markup. + + Specific derivatives (i.e., a "python-interactive" directive) + could interpret style based on cues, like the ">>> " prompt and + "input()"/"raw_input()" calls. + + See docutils-users 2003-03-03. + + - _`body.listing`: Code listing with title (to be numbered + eventually), equivalent of "figure" and "table" directives. + + - _`pysource.usage`: Extract a usage message from the program, + either by running it at the command line with a ``--help`` option + or through an exposed API. [Suggestion for Optik.] + + - _`body.float`: Generic float that can be used for figures, tables, + code listings, flowcharts, ... + + There is a Sphinx extension by Ignacio Fernández Galván + + I implemented something for generic floats in sphinx, and submitted a + pull request that is still waiting:: + + .. float:: + :type: figure + :caption: My caption + + https://github.com/sphinx-doc/sphinx/pull/1858 + + +Interpreted Text +---------------- + +Interpreted text is entirely a reStructuredText markup construct, a +way to get around built-in limitations of the medium. Some roles are +intended to introduce new doctree elements, such as "title-reference". +Others are merely convenience features, like "RFC". + +All supported interpreted text roles must already be known to the +Parser when they are encountered in a document. Whether pre-defined +in core/client code, or in the document, doesn't matter; the roles +just need to have already been declared. Adding a new role may +involve adding a new element to the DTD and may require extensive +support, therefore such additions should be well thought-out. There +should be a limited number of roles. + +The only place where no limit is placed on variation is at the start, +at the Reader/Parser interface. Transforms are inserted by the Reader +into the Transformer's queue, where non-standard elements are +converted. Once past the Transformer, no variation from the standard +Docutils doctree is possible. + +An example is the Python Source Reader, which will use interpreted +text extensively. The default role will be "Python identifier", which +will be further interpreted by namespace context into , +, , , etc. elements (see pysource.dtd), +which will be transformed into standard hyperlink references, which +will be processed by the various Writers. No Writer will need to have +any knowledge of the Python-Reader origin of these elements. + +* Add explicit interpreted text roles for the rest of the implicit + inline markup constructs: named-reference, anonymous-reference, + footnote-reference, citation-reference, substitution-reference, + target, uri-reference (& synonyms). + +* Add directives for each role as well? This would allow indirect + nested markup:: + + This text contains |nested inline markup|. + + .. |nested inline markup| emphasis:: + + nested ``inline`` markup + +* Implement roles: + + - "_`raw-wrapped`" (or "_`raw-wrap`"): Base role to wrap raw text + around role contents. + + For example, the following reStructuredText source ... :: + + .. role:: red(raw-formatting) + :prefix: + :html: + :latex: {\color{red} + :suffix: + :html: + :latex: } + + colored :red:`text` + + ... will yield the following document fragment:: + + + colored + + + + + {\color{red} + + text + + + + } + + Possibly without the intermediate "inline" node. + + - _`"acronym" and "abbreviation"`: Associate the full text with a + short form. Jason Diamond's description: + + I want to translate ```reST`:acronym:`` into ``reST``. The value of the + title attribute has to be defined out-of-band since you can't + parameterize interpreted text. Right now I have them in a + separate file but I'm experimenting with creating a directive + that will use some form of reST syntax to let you define them. + + Should Docutils complain about undefined acronyms or + abbreviations? + + What to do if there are multiple definitions? How to + differentiate between CSS (Content Scrambling System) and CSS + (Cascading Style Sheets) in a single document? David Priest + responds, + + The short answer is: you don't. Anyone who did such a thing + would be writing very poor documentation indeed. (Though I + note that `somewhere else in the docs`__, there's mention of + allowing replacement text to be associated with the + abbreviation. That takes care of the duplicate + acronyms/abbreviations problem, though a writer would be + foolish to ever need it.) + + __ `inline parameter syntax`_ + + How to define the full text? Possibilities: + + 1. With a directive and a definition list? :: + + .. acronyms:: + + reST + reStructuredText + DPS + Docstring Processing System + + Would this list remain in the document as a glossary, or would + it simply build an internal lookup table? A "glossary" + directive could be used to make the intention clear. + Acronyms/abbreviations and glossaries could work together. + + Then again, a glossary could be formed by gathering individual + definitions from around the document. + + 2. Some kind of `inline parameter syntax`_? :: + + `reST `:acronym: is `WYSIWYG `:acronym: plaintext markup. + + .. _inline parameter syntax: + rst/alternatives.html#parameterized-interpreted-text + + 3. A combination of 1 & 2? + + The multiple definitions issue could be handled by establishing + rules of priority. For example, directive-based lookup tables + have highest priority, followed by the first inline definition. + Multiple definitions in directive-based lookup tables would + trigger warnings, similar to the rules of `implicit hyperlink + targets`__. + + __ ../ref/rst/restructuredtext.html#implicit-hyperlink-targets + + 4. Using substitutions? :: + + .. |reST| acronym:: reST + :text: reStructuredText + + What do we do for other formats than HTML which do not support + tool tips? Put the full text in parentheses? + + - "figure", "table", "listing", "chapter", "page", etc: See `object + numbering and object references`_ above. + + - "glossary-term": This would establish a link to a glossary. It + would require an associated "glossary-entry" directive, whose + contents could be a definition list:: + + .. glossary-entry:: + + term1 + definition1 + term2 + definition2 + + This would allow entries to be defined anywhere in the document, + and collected (via a "glossary" directive perhaps) at one point. + + +Doctree pruning +--------------- + +[DG 2017-01-02: These are not definitive to-dos, just one developer's +opinion. Added 2009-10-13 by Günter Milde, in r6178.] +[Updated by GM 2017-02-04] + +The number of doctree nodes can be reduced by "normalizing" some related +nodes. This makes the document model and the writers somewhat simpler. + +* The "doctest" element can be replaced by literal blocks with a class + attribute (similar to the "code" directive output). + The syntax shall be left in reST. + + [DG 2017-01-02:] +0. + + Discussion + The syntax could be left in reST (for a set period of time?). + + [DG 2017-01-02:] The syntax must be left in reST, practically + forever. Removing it would introduce a huge backwards + incompatibility. Any syntax removal must be preceded by a thorough + review and planning, including a deprecation warning process. My + opinion: it's not worth it. + +* "Normalize" special admonitions (note, hint, warning, ...) during parsing + (similar to _`transforms.writer_aux.Admonitions`). There is no need to + keep them as distinct elements in the doctree specification. + + [DG 2017-01-02:] -1: {body} is much more concise and + expressive than Note</>{body}</>, and the title + translation can be put off until much later in the process. + + [GM 2017-02-04]: + + -0 for <admonition class=note><title>Note</>... instead of <note>: + a document is rarely printed/used as doctree or XML. + + +1 reduce the complexity of the doctree + (there is no 1:1 rST syntax element <-> doctree node mapping anyway). + + +2 every writer needs 9 visit_*/depart_* method pairs to handle the 9 + subtypes of an admonition, i.e. we could but also remove 36 redundant + methods (HTML, LaTeX, Manpage, ODF). + + -1 the most unfortunately named of these directives will survive. [#]_ + + .. [#] with "biblical touch" and hard to translate: + + :admonition: | Ermahnung; Verweis; Warnung; Rüge + | (exhortation; censure; warning; reprimand, rebuke) + + + Keep the special admonition directives in reStructuredText syntax. + + [DG 2017-01-02:] We must definitely keep the syntax. Removing it + would introduce a huge backwards incompatibility. + + +Unimplemented Transforms +======================== + +* _`Footnote & Citation Gathering` + + Collect and move footnotes & citations to the end of a document or the + place of a "footnotes" or "citations" directive + (see `<./ref/rst/directives.html>_`) + + Footnotes: + Collect all footnotes that are referenced in the document before the + directive (and after an eventually preceding ``.. footnotes::`` + directive) and insert at this place. + + Allows "endnotes" at a configurable place. + + Citations: + Collect citations that are referenced ... + + Citations can be: + + a) defined in the document as citation elements + + b) auto-generated from entries in a bibliographic database. + + + based on bibstuff_? + + also have a look at + + * CrossTeX_, a backwards-compatible, improved bibtex + re-implementation in Python (including HTML export). + (development stalled since 2 years) + + * Pybtex_,a drop-in replacement for BibTeX written in Python. + + * BibTeX styles & (experimental) pythonic style API. + * Database in BibTeX, BibTeXML and YAML formats. + * full Unicode support. + * Write to TeX, HTML and plain text. + + * `Zotero plain <http://e6h.org/%7Eegh/hg/zotero-plain/>`__ + supports Zotero databases and CSL_ styles with Docutils with an + ``xcite`` role. + + * `sphinxcontrib-bibtex`_ Sphinx extension with "bibliography" + directive and "cite" role supporting BibTeX databases. + + * `Modified rst2html + <http://www.loria.fr/~rougier/coding/article/rst2html.py>`__ by + Nicolas Rougier. + + + * Automatically insert a "References" heading? + +.. _CrossTeX: http://www.cs.cornell.edu/people/egs/crosstex/ +.. _Pybtex: http://pybtex.sourceforge.net/ +.. _CSL: http://www.citationstyles.org/ +.. _sphinxcontrib-bibtex: http://sphinxcontrib-bibtex.readthedocs.org/ + +* _`Reference Merging` + + When merging two or more subdocuments (such as docstrings), + conflicting references may need to be resolved. There may be: + + * duplicate reference and/or substitution names that need to be made + unique; and/or + * duplicate footnote numbers that need to be renumbered. + + Should this be done before or after reference-resolving transforms + are applied? What about references from within one subdocument to + inside another? + +* _`Document Splitting` + + If the processed document is written to multiple files (possibly in + a directory tree), it will need to be split up. Internal references + will have to be adjusted. + + (HTML only? Initially, yes. Eventually, anything should be + splittable.) + + Ideas: + + - Insert a "destination" attribute into the root element of each + split-out document, containing the path/filename. The Output + object or Writer will recognize this attribute and split out the + files accordingly. Must allow for common headers & footers, + prev/next, breadcrumbs, etc. + + - Transform a single-root document into a document containing + multiple subdocuments, recursively. The content model of the + "document" element would have to change to:: + + <!ELEMENT document + ( (title, subtitle?)?, + decoration?, + (docinfo, transition?)?, + %structure.model;, + document* )> + + (I.e., add the last line -- 0 or more document elements.) + + Let's look at the case of hierarchical (directories and files) + HTML output. Each document element containing further document + elements would correspond to a directory (with an index.html file + for the content preceding the subdocuments). Each document + element containing no subdocuments (i.e., structure model elements + only) corresponds to a concrete file with no directory. + + The natural transform would be to map sections to subdocuments, + but possibly only a given number of levels deep. + +* _`Navigation` + + If a document is split up, each segment will need navigation links: + parent, children (small TOC), previous (preorder), next (preorder). + Part of `Document Splitting`_? + +* _`List of System Messages` + + The ``system_message`` elements are inserted into the document tree, + adjacent to the problems themselves where possible. Some (those + generated post-parse) are kept until later, in + ``document.messages``, and added as a special final section, + "Docutils System Messages". + + Docutils could be made to generate hyperlinks to all known + system_messages and add them to the document, perhaps to the end of + the "Docutils System Messages" section. + + Fred L. Drake, Jr. wrote: + + I'd like to propose that both parse- and transformation-time + messages are included in the "Docutils System Messages" section. + If there are no objections, I can make the change. + + The advantage of the current way of doing things is that parse-time + system messages don't require a transform; they're already in the + document. This is valuable for testing (unit tests, + tools/quicktest.py). So if we do decide to make a change, I think + the insertion of parse-time system messages ought to remain as-is + and the Messages transform ought to move all parse-time system + messages (remove from their originally inserted positions, insert in + System Messages section). + +* _`Index Generation` + + +HTML Writer +=========== + +* Make the _`list compacting` logic more generic: For example, allow + for literal blocks or line blocks inside of compact list items. + + This is not implementable as long as list compacting is done by + omitting ``<p>`` tags. List compacting would need to be done by + adjusting CSS margins instead. + + :2015-04-02: The new html writer no longer strips <p> tags but adds the + class value ``simple`` to the list. + Formatting is done by CSS --- configurable by a custom style + sheet. + + Auto-compactization can be overridden by the ``open`` vs. + ``compact`` class arguments. + +* Idea for field-list rendering: hanging indent:: + + Field name (bold): First paragraph of field body begins + with the field name inline. + + If the first item of a field body is not a paragraph, + it would begin on the following line. + + :2015-04-02: The new html writer writes field-lists as definition lists + with class ``field-list``. + Formatting is done by CSS --- configurable by a custom style + sheet. The default style sheet has some examples, including a + run-in field-list style. + +* Add more support for <link> elements, especially for navigation + bars. + + The framework does not have a notion of document relationships, so + probably raw.destination_ should be used. + + We'll have framework support for document relationships when support + for `multiple output files`_ is added. The HTML writer could + automatically generate <link> elements then. + + .. _raw.destination: misc.raw_ + +* Base list compaction on the spacing of source list? Would require + parser support. (Idea: fantasai, 16 Dec 2002, doc-sig.) + +* Add a tool tip ("title" attribute?) to footnote back-links + identifying them as such. Text in Docutils language module. + + +PEP/HTML Writer +=============== + +* Remove the generic style information (duplicated from html4css1.css) + from pep.css to avoid redundancy. + + Set ``stylesheet-path`` to "html4css.css,pep.css" and the + ``stylesheet-dirs`` accordingly instead. (See the xhtml11 writer for an + example.) + + +S5/HTML Writer +============== + +* Add a way to begin an untitled slide. + +* Add a way to begin a new slide, continuation, using the same title + as the previous slide? (Unnecessary?) You need that if you have a + lot of items in one section which don't fit on one slide. + + Maybe either this item or the previous one can be realized using + transitions. + +* Have a timeout on incremental items, so the colour goes away after 1 + second. + +* Add an empty, black last slide (optionally). Currently the handling + of the last slide is not very nice, it re-cycles through the + incremental items on the last slide if you hit space-bar after the + last item. + +* Add a command-line option to disable advance-on-click. + +* Add a speaker's master document, which would contain a small version + of the slide text with speaker's notes interspersed. The master + document could use ``target="whatever"`` to direct links to a + separate window on a second monitor (e.g., a projector). + + .. Note:: This item and the following items are partially + accomplished by the S5 1.2 code (currently in alpha), which has + not yet been integrated into Docutils. + +* Speaker's notes -- how to intersperse? Could use reST comments + (".."), but make them visible in the speaker's master document. If + structure is necessary, we could use a "comment" directive (to avoid + nonsensical DTD changes, the "comment" directive could produce an + untitled topic element). + + The speaker's notes could (should?) be separate from S5's handout + content. + +* The speaker's master document could use frames for easy navigation: + TOC on the left, content on the right. + + - It would be nice if clicking in the TOC frame simultaneously + linked to both the speaker's notes frame and to the slide window, + synchronizing both. Needs JavaScript? + + - TOC would have to be tightly formatted -- minimal indentation. + + - TOC auto-generated, as in the PEP Reader. (What if there already + is a "contents" directive in the document?) + + - There could be another frame on the left (top-left or bottom-left) + containing a single "Next" link, always pointing to the next slide + (synchronized, of course). Also "Previous" link? FF/Rew go to + the beginning of the next/current parent section? First/Last + also? Tape-player-style buttons like ``|<< << < > >> >>|``? + +Epub/HTML Writer +================ + +Add epub as an output format. + + epub is an open file format for ebooks based on HTML, specified by the + `International Digital Publishing Forum`_. Thus, documents in epub + format are suited to be read with `electronic reading devices`_. + +Pack the output of a HTML writer and supporting files (e.g. images) +into one single epub document. + +There are `links to two 3rd party ePub writers`__ in the Docutils link list. +Test and consider moving the better one into the docutils core. + +__ ../user/links.html#ePub +.. _International Digital Publishing Forum: http://www.idpf.org/ +.. _electronic reading devices: + https://en.wikipedia.org/wiki/List_of_e-book_readers + + +LaTeX writer +============ + +Also see the Problems__ section in the `latex writer documentation`_. + +__ ../user/latex.html#problems + +.. _latex writer documentation: ../user/latex.html + +.. _latex-variants: + ../../../sandbox/latex-variants/README.html + +Bug fixes +--------- + +* Too deeply nested lists fail: generate a warning and provide + a workaround. + + 2017-02-09 this is fixed for enumeration in 0.13.1 + + for others, cf. sandbox/latex-variants/tests/rst-levels.txt + +* File names of included graphics (see also `grffile` package). + +* Paragraph following field-list or table in compound is indented. + + This is a problem with the current DUfieldlist definition and with the + use of "longtable" for tables. See `other LaTeX constructs and packages + instead of re-implementations`_ for alternatives. + + +Generate clean and configurable LaTeX source +---------------------------------------------- + +Which packages do we want to use? + ++ base and "recommended" packages + + (packages that should be in a "reasonably sized and reasonably modern + LaTeX installation like the `texlive-latex-recommended` Debian package, + say): + ++ No "fancy" or "exotic" requirements. + ++ pointers to advanced packages and their use in the `latex writer + documentation`_. + +Configurable placement of figure and table floats +````````````````````````````````````````````````` + +* Special class argument to individually place figures? + + Example:: + + .. figure:: foo.pdf + :class: place-here-if-possible place-top place-bottom + + would be written as ``\figure[htb]{...}`` with + the optional args: + + :H: place-here + :h: place-here-if-possible + :t: place-top + :b: place-bottom + :p: place-on-extra-page + + Alternative: class value = "place-" + optional arg, e.g. ``:class: + place-htb``. + +Footnotes +````````` + ++ True footnotes with LaTeX auto-numbering (as option ``--latex-footnotes``) + (also for target-footnotes): + Write ``\footnote{<footnote content>}`` at the place of the + ``<footnote_reference>`` node. + ++ Open questions: + + - Load hyperref_ with option "hyperfootnotes" and/or + package footnotebackref_ or leave this to the user? + + - Consider cases where LaTeX does not support footnotes + (inside tables__, headings__, caption, ...). + Use ftnxtra_, tabularx_, tabulary_, longtable_? + + __ http://www.tex.ac.uk/cgi-bin/texfaq2html?label=footintab + __ http://www.tex.ac.uk/cgi-bin/texfaq2html?label=ftnsect + + - Consider `multiple footnote refs to common footnote text`__. + + KOMA-script classes and the KOMA scrextend_ package provide + ``\footref`` that can be used for additional references to a + ``\label``-ed footnote. Since 2021-05-01, ``\footref`` is provided + by the LaTeX core, too. + + __ http://www.tex.ac.uk/cgi-bin/texfaq2html?label=multfoot + + - Consider numbered vs. symbolic footnotes. + ++ document customization (links to how-to and packages) + +.. Footnote packages at CTAN (www.ctan.org/pkg/<packagename>): + + :footnote: provides a "savenotes" environment which collects all + footnotes and emits them at ``end{savenotes}`` + (texlive-latex-recommended) + + :ftnxtra_: fixes the issue of footnote inside \caption{}, + tabular environment and \section{} like commands. + + :footnotebackref_: bidirectional links to/from footnote mark to + footnote text. + +.. Footnote Discussion: + + `German tutorial + <http://www2.informatik.hu-berlin.de/~ahamann/studies/footnotes.pdf>`__ + + `wikibooks: footnote workarounds + <https://en.wikibooks.org/wiki/LaTeX/Footnotes_and_Margin_Notes#Common_problems_and_workarounds>`__ + +.. _footnotebackref: https://www.ctan.org/pkg/footnotebackref +.. _ftnxtra: https://www.ctan.org/pkg/ftnxtra +.. _hyperref: https://www.ctan.org/pkg/hyperref +.. _longtable: https://www.ctan.org/pkg/longtable +.. _scrextend: https://www.ctan.org/pkg/longtable +.. _tabularx: https://www.ctan.org/pkg/tabularx + + +Other LaTeX constructs and packages instead of re-implementations +````````````````````````````````````````````````````````````````` + +* Check the generated source with package `nag`. + +* enumitem_ (texlive-latex-extra) for field-lists? + +.. _enumitem: https://www.ctan.org/pkg/enumitem + +Default layout +-------------- + +* Use italic instead of slanted for titlereference? + +* Start a new paragraph after lists (as currently) + or continue (no blank line in source, no parindent in output)? + + Overriding: + + * continue if the `compound paragraph`_ directive is used (as currently), + or + * force a new paragraph with an empty comment. + +* Sidebar handling (environment with `framed`, `marginnote`, `wrapfig`, + ...)? + +* Use optionlist for docinfo? + +* Keep literal-blocks together on a page, avoid pagebreaks. + + Failed experiments up to now: samepage, minipage, pagebreak 1 to 4 before + the block. + + Should be possible with ``--literal-block-env==lstlistings`` and some + configuration... + +* More space between title and subtitle? :: + + - \\ % subtitle% + + \\[0.5em] % subtitle% + +.. _compound paragraph: + ../ref/rst/directives.html#compound-paragraph + +Tables +`````` + +* Improve/simplify logic to set the column width in the output. + + + Assumed reST line length for table width setting configurable, or + + use `ltxtable` (a combination of `tabularx` (auto-width) and + `longtable` (page breaks)), or + + use tabularx column type ``X`` and let LaTeX decide width, or + + use tabulary_? + + .. _tabulary: https://www.ctan.org/pkg/tabulary + +* From comp.text.tex (13. 4. 2011): + + When using fixed width columns, you should ensure that the total + width does not exceed \linewidth: if the first column is p{6cm} + the second one should be p{\dimexpr\linewidth-6cm-4\tabcolsep} + because the glue \tabcolsep is added twice at every column edge. + You may also consider to set \tabcolsep to a different value... + +* csv-tables do not have a colwidth. + +* Add more classes or options, e.g. for + + + horizontal alignment and rules. + + long table vs. tabular (see next item). + +* Use tabular instead of longtable for tables in legends or generally + inside a float? + + Alternatively, default to tabular and use longtable only if specified + by config setting or class argument (analogue to booktable)? + +* Table heads and footer for longtable (firstpage lastpage ..)? + +* In tools.txt the option tables right column, there should be some more + spacing between the description and the next paragraph "Default:". + +* Paragraph separation in tables is hairy. + see http://www.tex.ac.uk/cgi-bin/texfaq2html?label=struttab + + - The strut solution did not work. + - setting extrarowheight added ad top of row not between paragraphs in + a cell. ALTHOUGH i set it to 2pt because, text is too close to the topline. + - baselineskip/stretch does not help. + +* Should there be two hlines after table head and on table end? + +* Place titled tables in a float ('table' environment)? + + The 'table', 'csv-table', and 'list-table' directives support an (optional) + table title. In analogy to the 'figure' directive this should map to a + table float. + +Image and figure directives +``````````````````````````` + +* compare the test case in: + + + `<../../test/functional/input/data/standard.txt>`__ + + `<../../test/functional/expected/standalone_rst_html4css1.html>`__ + + `<../../test/functional/expected/standalone_rst_latex.tex>`__ + +* The default CSS styling for HTML output (plain.css, default.css) lets + text following a right- or left-aligned image float to the side of the + image/figure. + + + Use this default also for LaTeX? + + + Wrap text around figures/images with class argument "wrap" + (like the odt writer)? + + Use `wrapfig` (or other recommended) package. + +* support more graphic formats (especially SVG, the only standard + vector format for HTML) + + +Missing features +---------------- + +* support "figwidth" argument for figures. + + As the 'figwidth' argument is still ignored and the "natural width" of + a figure in LaTeX is 100 % of the text width, setting the 'align' + argument has currently no effect on the LaTeX output. + +* Multiple author entries in docinfo (same thing as in html). + +* Consider supporting the "compact" option and class argument (from + rst2html) as some lists look better compact and others need the space. + +* Better citation support + (see `Footnote & Citation Gathering`_). + +* If ``use-latex-citations`` is used, a bibliography is inserted right at the + end of the document. + + Put in place of the to-be-implemented "citations" directive + (see `Footnote & Citation Gathering`_). + + +Unicode to LaTeX +```````````````` + +The `LyX <http://www.lyx.org>`_ document processor has a comprehensive +Unicode to LaTeX conversion feature with a file called ``unicodesymbols`` +that lists LaTeX counterparts for a wide range of Unicode characters. + +* Use this in the LaTeXTranslator? + Think of copyright issues! + +* The "ucs" package has many translations in ...doc/latex/ucs/config/ + +* The bibstuff_ tool ships a `latex_codec` Python module! + +.. _bibstuff: http://code.google.com/p/bibstuff/ + + +XeTeX writer +```````````` + +* Glyphs missing in the font are left out in the PDF without warning + (e.g. ⇔ left-right double arrow in the functional test output). + +* Disable word-wrap (hyphenation) in literal text locally with + ``providecommand{\nohyphenation}{\addfontfeatures{HyphenChar=None}}``? + + +problematic URLs +```````````````` + +* ^^ LaTeX's special syntax for characters results in "strange" replacements + (both with \href and \url). + + `file with ^^ <../strange^^name>`__: + `<../strange^^name>`__ + +* Unbalanced braces, { or }, will fail (both with \href and \url):: + + `file with { <../strange{name>`__ + `<../strange{name>`__ + +Currently, a warning is written to the error output stream. + +For correct printing, we can + +* use the \href command with "normal" escaped name argument, or +* define a url-command in the preamble :: + + \urldef{\fragileURLi}\nolinkurl{myself%node@gateway.net} + +but need to find a way to insert it as href argument. + +The following fails:: + + \href{https://www.w3.org/XML/Schema^^dev}{\fragileURLi} + +Use %-replacement like http://nowhere/url_with%28parens%29 ? + +-> does not work for file paths (with pdflatex and xpdf). + + +add-stylesheet option +````````````````````` + +From http://article.gmane.org/gmane.text.docutils.devel/3429/ + +The problem is that since we have a default value, we have to +differentiate between adding another stylesheet and replacing the +default. I suggest that the existing --stylesheet & --stylesheet-path +options keep their semantics to replace the existing settings. We +could introduce new --add-stylesheet & --add-stylesheet-path options, +which accumulate; further --stylesheet/--stylesheet-path options would +clear these lists. The stylesheet or stylesheet_path setting (only +one may be set), plus the added_stylesheets and added_stylesheet_paths +settings, describe the combined styles. + +For example, this run will have only one custom stylesheet: + + rstpep2html.py --stylesheet-path custom.css ... + +This run will use the default stylesheet, and the custom one: + + rstpep2html.py --add-stylesheet-path custom.css ... + +This run will use the default stylesheet, a custom local stylesheet, +and an external stylesheet: + + rstpep2html.py --add-stylesheet-path custom.css \ + --add-stylesheet https://www.example.org/external.css ... + +This run will use only the second custom stylesheet: + + rstpep2html.py --add-stylesheet-path custom.css \ + --stylesheet-path second.css ... + + + + +Front-End Tools +=============== + +* Parameterize help text & defaults somehow? Perhaps a callback? Or + initialize ``settings_spec`` in ``__init__`` or ``init_options``? + +* Disable common options that don't apply? + (This should now be easier with ``frontend.filter_settings_spec``.) + +* Add ``--section-numbering`` command line option. The "sectnum" + directive should override the ``--no-section-numbering`` command + line option then. + +* Implement the following suggestions from clig.dev? + + Display output on success, but keep it brief. + provide a --quiet option to suppress all non-essential output. + + Consider chaining several args as input and use --output + (or redirection) for output. + + -- https://clig.dev/#help + +.. _partial parsing: + https://docs.python.org/3/library/argparse.html#partial-parsing + +.. _configuration: ../user/config.html +.. _transforms: ../api/transforms.html + + + +.. + Local Variables: + mode: indented-text + indent-tabs-mode: nil + sentence-end-double-space: t + fill-column: 70 + End: diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/dev/website.txt b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/dev/website.txt new file mode 100644 index 00000000..222cc3c8 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/dev/website.txt @@ -0,0 +1,107 @@ +=================== + Docutils Web Site +=================== + +:Author: David Goodger; open to all Docutils developers +:Contact: docutils-develop@lists.sourceforge.net +:Date: $Date$ +:Revision: $Revision$ +:Copyright: This document has been placed in the public domain. + +The Docutils web site, <https://docutils.sourceforge.io/>, is +maintained by the ``docutils-update.local`` script, run by project +maintainers on their local machines. The script +will process any .txt file which is newer than the corresponding .html +file in the local copy of the project's web directory and upload the changes +to the web site at SourceForge. + +.. .. old instructions, for cron job: + + The Docutils web site, <https://docutils.sourceforge.io/>, is + maintained automatically by the ``docutils-update`` script, run as an + hourly cron job on shell.berlios.de (by user "wiemann"). The script + will process any .txt file which is newer than the corresponding .html + file in the project's web directory on shell.berlios.de + (``/home/groups/docutils/htdocs/aux/htdocs/``) and upload the changes + to the web site at SourceForge. + +Please **do not** add any generated .html files to the Docutils +repository. They will be generated automatically after a one-time +setup (`described below`__). + +__ `Adding .txt Files`_ + +The docutils-update.local__ script is located at +``sandbox/infrastructure/docutils-update.local``. + +__ https://docutils.sourceforge.io/sandbox/infrastructure/docutils-update.local + +If you want to share files via the web, you can upload them using the +uploaddocutils.sh__ script +(``sandbox/infrastructure/uploaddocutils.sh``). + +__ https://docutils.sourceforge.io/sandbox/infrastructure/uploaddocutils.sh + + +Setting Up +========== + +(TBA) + +.. hint:: + Anyone with checkin privileges can be a web-site maintainer. You need to + set up the directories for a local website build. + + The procedure for that was on the docutils-devel list a while ago. + + +Adding .txt Files +================= + +User/Contributor +---------------- + +When adding a new .txt file that should be converted to HTML: + +#. Edit sandbox/infrastructure/htmlfiles.lst, and add the .html file + corresponding to the new .txt file (please keep the sorted order). + +#. Commit the edited version to the SVN repository. + +Maintainer +---------- + +#. If there are new directories in the SVN, allow the update script to run + once to create the directories in the filesystem before preparing for + HTML processing. + +#. Run the sandbox/infrastructure/update-htmlfiles shell script to generate + .html files:: + + cd <DOCUTILS-ROOT>/docutils/ + sandbox/infrastructure/update-htmlfiles \ + sandbox/infrastructure/htmlfiles.lst + + (Maybe this should become part of docutils-update.local.) + + +Removing Files & Directories +============================ + +#. Remove from SVN + +#. Remove to-be-generated HTML files from + ``sandbox/infrastructure/htmlfiles.lst``. + +#. Removing files and directories from SVN will not trigger their removal + from the web site. Files and directories must be manually removed from + sourceforge.net (under ``/home/project-web/docutils/htdocs/``). + + +.. + Local Variables: + mode: indented-text + indent-tabs-mode: nil + sentence-end-double-space: t + fill-column: 70 + End: diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/howto/cmdline-tool.txt b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/howto/cmdline-tool.txt new file mode 100644 index 00000000..5c1da0f2 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/howto/cmdline-tool.txt @@ -0,0 +1,66 @@ +=============================================== + Inside A Docutils Command-Line Front-End Tool +=============================================== + +:Author: David Goodger +:Contact: docutils-develop@lists.sourceforge.net +:Date: $Date$ +:Revision: $Revision$ +:Copyright: This document has been placed in the public domain. + +`The Docutils Publisher`_ class was set up to make building +command-line tools easy. All that's required is to choose components +and supply settings for variations. Let's take a look at a typical +command-line front-end tool, ``tools/rst2html.py``, from top to +bottom. + +On Unixish systems, it's best to make the file executable (``chmod +x +file``), and supply an interpreter on the first line, the "shebang" or +"hash-bang" line:: + + #!/usr/bin/env python + +Windows systems can be set up to associate the Python interpreter with +the ``.py`` extension. + +Next are some comments providing metadata:: + + # $Id$ + # Author: David Goodger <goodger@python.org> + # Copyright: This module has been placed in the public domain. + +The module docstring describes the purpose of the tool:: + + """ + A minimal front end to the Docutils Publisher, producing HTML. + """ + +This next block attempts to invoke locale support for +internationalization services, specifically text encoding. It's not +supported on all platforms though, so it's forgiving:: + + try: + import locale + locale.setlocale(locale.LC_ALL, '') + except: + pass + +The real work will be done by the code that's imported here:: + + from docutils.core import publish_cmdline, default_description + +We construct a description of the tool, for command-line help:: + + description = ('Generates (X)HTML documents from standalone ' + 'reStructuredText sources. ' + default_description) + +Now we call the Publisher convenience function, which takes over. +Most of its defaults are used ("standalone" Reader, +"reStructuredText" Parser, etc.). The HTML Writer is chosen by name, +and a description for command-line help is passed in:: + + publish_cmdline(writer_name='html', description=description) + +That's it! `The Docutils Publisher`_ takes care of the rest. + +.. _The Docutils Publisher: ./publisher.html diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/howto/html-stylesheets.txt b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/howto/html-stylesheets.txt new file mode 100644 index 00000000..bf7a46a9 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/howto/html-stylesheets.txt @@ -0,0 +1,94 @@ +============================================== + Writing HTML (CSS) Stylesheets for Docutils_ +============================================== + +:Author: Lea Wiemann +:Contact: docutils-develop@lists.sourceforge.net +:Date: $Date$ +:Revision: $Revision$ +:Copyright: This document has been placed in the public domain. + +.. _Docutils: https://docutils.sourceforge.io/ + + +The look of Docutils' HTML output is customizable via CSS stylesheets. +The default stylesheets can be found in the +``docutils/writers/html*/`` directories of the ``html4css1`` and +``html-base`` writers in the Docutils installation. Use the front-end +command (``rst2html.py`` or ``rst2html5.py``) with the +``--help`` option and look at the description of the ``--stylesheet-path`` +command-line option for the exact machine-specific location. + +To customize the look of HTML documents, you can override the settings +of the default stylesheet in your own stylesheet. Specify both, the +default stylesheet and your stylesheet to the ``--stylesheet`` or +``--stylesheet-path`` command line option (or the corresponding +settings in a configuration_ file), e.g. :: + + rst2html.py --stylesheet=html4css1.css,transition-stars.css + +This is the preferable approach if you want to embed the stylesheet(s), as +this ensures that an up-to-date version of ``html4css1.css`` is embedded. + +Alternatively, copy the default style sheet to the same place as your +output HTML files will go and place a new file (e.g. called +``my-docutils.css``) in the same directory and use the following +template:: + + /* + :Author: Your Name + :Contact: Your Email Address + :Copyright: This stylesheet has been placed in the public domain. + + Stylesheet for use with Docutils. [Optionally place a more + detailed description here.] + */ + + @import url(html4css1.css); + + /* Your customizations go here. For example: */ + + h1, h2, h3, h4, h5, h6, p.topic-title { + font-family: sans-serif } + +For help on the CSS syntax, see, e.g., the `W3C Specification`_, the +`WDG's guide to Cascading Style Sheets`__, or the `MDN Web Docs`__. + +.. _W3C Specification: https://www.w3.org/Style/CSS/#specs +__ http://www.htmlhelp.com/reference/css/ +__ https://developer.mozilla.org/en-US/docs/Web/CSS + +It is important that you do not edit a copy of ``html4css1.css`` +directly because ``html4css1.css`` is frequently updated with each new +release of Docutils. + +Also make sure that you import ``html4css1.css`` (using "``@import +url(html4css1.css);``") because the definitions contained in the +default stylesheet are required for correct rendering (margins, +alignment, etc.). + +If you think your stylesheet is fancy and you would like to let others +benefit from your efforts, you are encouraged to post the stylesheet to the +Docutils-users_ mailing list. It might find its place in the `stylesheet +collection`_ in the Docutils Sandbox_. + +If you decide to share your stylesheet with other users of Docutils, +please keep website-specific customizations not applicable to +Docutils' HTML code in a separate stylesheet. + +.. base for relative links is /docutils/docs/howto/ + +.. _Docutils-users: ../user/mailing-lists.html#docutils-users +.. _configuration: ../user/config.txt +.. _sandbox: ../../../sandbox +.. _stylesheet collection: ../../../sandbox/stylesheets/ + + + +.. + Local Variables: + mode: indented-text + indent-tabs-mode: nil + sentence-end-double-space: t + fill-column: 70 + End: diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/howto/i18n.txt b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/howto/i18n.txt new file mode 100644 index 00000000..85f99c95 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/howto/i18n.txt @@ -0,0 +1,178 @@ +================================ + Docutils_ Internationalization +================================ + +:Author: David Goodger +:Contact: docutils-develop@lists.sourceforge.net +:Date: $Date$ +:Revision: $Revision$ +:Copyright: This document has been placed in the public domain. + + +.. contents:: + + +This document describes the internationalization facilities of the +Docutils_ project. `Introduction to i18n`_ by Tomohiro KUBOTA is a +good general reference. "Internationalization" is often abbreviated +as "i18n": "i" + 18 letters + "n". + +.. Note:: + + The i18n facilities of Docutils should be considered a "first + draft". They work so far, but improvements are welcome. + Specifically, standard i18n facilities like "gettext" have yet to + be explored. + +Docutils is designed to work flexibly with text in multiple languages +(one language at a time). Language-specific features are (or should +be [#]_) fully parameterized. To enable a new language, two modules +have to be added to the project: one for Docutils itself (the +`Docutils Language Module`_) and one for the reStructuredText parser +(the `reStructuredText Language Module`_). Users may add local language +support via a module in the PYTHONPATH root (e.g. the working directory). + +.. [#] If anything in Docutils is insufficiently parameterized, it + should be considered a bug. Please report bugs to the Docutils + project bug tracker on SourceForge at + https://sourceforge.net/p/docutils/bugs/ + +.. _Docutils: https://docutils.sourceforge.io/ +.. _Introduction to i18n: + http://www.debian.org/doc/manuals/intro-i18n/ + + +Language Module Names +===================== + +Language modules are named using `language tags`_ as defined in +`BCP 47`_. [#]_ in lowercase, converting hyphens to underscores [#]_. + +A typical language identifier consists of a 2-letter language code +from `ISO 639`_ (3-letter codes can be used if no 2-letter code +exists). The language identifier can have an optional subtag, +typically for variations based on country (from `ISO 3166`_ 2-letter +country codes). If no language identifier is specified, the default +is "en" for English. Examples of module names include ``en.py``, +``fr.py``, ``ja.py``, and ``pt_br.py``. + +.. [#] BCP stands for 'Best Current Practice', and is a persistent + name for a series of RFCs whose numbers change as they are updated. + The latest RFC describing language tag syntax is RFC 5646, Tags for + the Identification of Languages, and it obsoletes the older RFCs + 4646, 3066 and 1766. + +.. [#] Subtags are separated from primary tags by underscores instead + of hyphens, to conform to Python naming rules. + +.. _language tags: https://www.w3.org/International/articles/language-tags/ +.. _BCP 47: https://www.rfc-editor.org/rfc/bcp/bcp47.txt +.. _ISO 639: http://www.loc.gov/standards/iso639-2/php/English_list.php +.. _ISO 3166: http://www.iso.ch/iso/en/prods-services/iso3166ma/ + 02iso-3166-code-lists/index.html + + +Docutils Language Module +======================== + +Modules in ``docutils/languages`` contain language mappings for +markup-independent language-specific features of Docutils. To make a +new language module, just copy the ``en.py`` file, rename it with the +code for your language (see `Language Module Names`_ above), and +translate the terms as described below. + +Each Docutils language module contains three module attributes: + +``labels`` + This is a mapping of node class names to language-dependent + boilerplate label text. The label text is used by Writer + components when they encounter document tree elements whose class + names are the mapping keys. + + The entry values (*not* the keys) should be translated to the + target language. + +``bibliographic_fields`` + This is a mapping of language-dependent field names (converted to + lower case) to canonical field names (keys of + ``DocInfo.biblio_notes`` in ``docutils.transforms.frontmatter``). + It is used when transforming bibliographic fields. + + The keys should be translated to the target language. + +``author_separators`` + This is a list of strings used to parse the 'Authors' + bibliographic field. They separate individual authors' names, and + are tried in order (i.e., earlier items take priority, and the + first item that matches wins). The English-language module + defines them as ``[';', ',']``; semi-colons can be used to + separate names like "Arthur Pewtie, Esq.". + + Most languages won't have to "translate" this list. + + +reStructuredText Language Module +================================ + +Modules in ``docutils/parsers/rst/languages`` contain language +mappings for language-specific features of the reStructuredText +parser. To make a new language module, just copy the ``en.py`` file, +rename it with the code for your language (see `Language Module +Names`_ above), and translate the terms as described below. + +Each reStructuredText language module contains two module attributes: + +``directives`` + This is a mapping from language-dependent directive names to + canonical directive names. The canonical directive names are + registered in ``docutils/parsers/rst/directives/__init__.py``, in + ``_directive_registry``. + + The keys should be translated to the target language. Synonyms + (multiple keys with the same values) are allowed; this is useful + for abbreviations. + +``roles`` + This is a mapping language-dependent role names to canonical role + names for interpreted text. The canonical directive names are + registered in ``docutils/parsers/rst/states.py``, in + ``Inliner._interpreted_roles`` (this may change). + + The keys should be translated to the target language. Synonyms + (multiple keys with the same values) are allowed; this is useful + for abbreviations. + + +Testing the Language Modules +============================ + +Whenever a new language module is added or an existing one modified, +the unit tests should be run. The test modules can be found in the +docutils/test directory from code_ or from the `latest snapshot`_. + +The ``test_language.py`` module can be run as a script. With no +arguments, it will test all language modules. With one or more +language codes, it will test just those languages. For example:: + + $ python test_language.py en + .. + ---------------------------------------- + Ran 2 tests in 0.095s + + OK + +Use the "alltests.py" script to run all test modules, exhaustively +testing the parser and other parts of the Docutils system. + +.. _code: https://sourceforge.net/p/docutils/code/HEAD/tree/trunk/ +.. _latest snapshot: https://sourceforge.net/p/docutils/code/HEAD/tarball + + +Submitting the Language Modules +=============================== + +If you do not have repository write access and want to contribute your +language modules, feel free to submit them via the `SourceForge patch +tracker`__. + +__ https://sourceforge.net/p/docutils/patches/ diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/howto/rst-directives.txt b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/howto/rst-directives.txt new file mode 100644 index 00000000..9e23f80f --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/howto/rst-directives.txt @@ -0,0 +1,430 @@ +======================================= + Creating reStructuredText_ Directives +======================================= + +:Authors: Dethe Elza, David Goodger, Lea Wiemann +:Contact: docutils-develop@lists.sourceforge.net +:Date: $Date$ +:Revision: $Revision$ +:Copyright: This document has been placed in the public domain. + +.. _reStructuredText: https://docutils.sourceforge.io/rst.html + + +Directives are the primary extension mechanism of reStructuredText. +This document aims to make the creation of new directives as easy and +understandable as possible. There are only a couple of +reStructuredText-specific features the developer needs to know to +create a basic directive. + +The syntax of directives is detailed in the `reStructuredText Markup +Specification`_, and standard directives are described in +`reStructuredText Directives`_. + +Directives are a reStructuredText markup/parser concept. There is no +"directive" document tree element, no single element that corresponds +exactly to the concept of directives. Instead, choose the most +appropriate elements from the existing Docutils elements. Directives +build structures using the existing building blocks. See `The +Docutils Document Tree`_ and the ``docutils.nodes`` module for more +about the building blocks of Docutils documents. + +.. _reStructuredText Markup Specification: + ../ref/rst/restructuredtext.html#directives +.. _reStructuredText Directives: ../ref/rst/directives.html +.. _The Docutils Document Tree: ../ref/doctree.html + + +.. contents:: Table of Contents + + +The Directive Class +=================== + +Directives are created by defining a directive class that inherits +from ``docutils.parsers.rst.Directive``:: + + from docutils.parsers import rst + + class MyDirective(rst.Directive): + + ... + +To understand how to implement the directive, let's have a look at the +docstring of the ``Directive`` base class:: + + >>> from docutils.parsers import rst + >>> print rst.Directive.__doc__ + + Base class for reStructuredText directives. + + The following attributes may be set by subclasses. They are + interpreted by the directive parser (which runs the directive + class): + + - `required_arguments`: The number of required arguments (default: + 0). + + - `optional_arguments`: The number of optional arguments (default: + 0). + + - `final_argument_whitespace`: A boolean, indicating if the final + argument may contain whitespace (default: False). + + - `option_spec`: A dictionary, mapping known option names to + conversion functions such as `int` or `float` (default: {}, no + options). Several conversion functions are defined in the + directives/__init__.py module. + + Option conversion functions take a single parameter, the option + argument (a string or ``None``), validate it and/or convert it + to the appropriate form. Conversion functions may raise + `ValueError` and `TypeError` exceptions. + + - `has_content`: A boolean; True if content is allowed. Client + code must handle the case where content is required but not + supplied (an empty content list will be supplied). + + Arguments are normally single whitespace-separated words. The + final argument may contain whitespace and/or newlines if + `final_argument_whitespace` is True. + + If the form of the arguments is more complex, specify only one + argument (either required or optional) and set + `final_argument_whitespace` to True; the client code must do any + context-sensitive parsing. + + When a directive implementation is being run, the directive class + is instantiated, and the `run()` method is executed. During + instantiation, the following instance variables are set: + + - ``name`` is the directive type or name (string). + + - ``arguments`` is the list of positional arguments (strings). + + - ``options`` is a dictionary mapping option names (strings) to + values (type depends on option conversion functions; see + `option_spec` above). + + - ``content`` is a list of strings, the directive content line by line. + + - ``lineno`` is the line number of the first line of the directive. + + - ``content_offset`` is the line offset of the first line of the content from + the beginning of the current input. Used when initiating a nested parse. + + - ``block_text`` is a string containing the entire directive. + + - ``state`` is the state which called the directive function. + + - ``state_machine`` is the state machine which controls the state which called + the directive function. + + Directive functions return a list of nodes which will be inserted + into the document tree at the point where the directive was + encountered. This can be an empty list if there is nothing to + insert. + + For ordinary directives, the list must contain body elements or + structural elements. Some directives are intended specifically + for substitution definitions, and must return a list of `Text` + nodes and/or inline elements (suitable for inline insertion, in + place of the substitution reference). Such directives must verify + substitution definition context, typically using code like this:: + + if not isinstance(state, states.SubstitutionDef): + error = state_machine.reporter.error( + 'Invalid context: the "%s" directive can only be used ' + 'within a substitution definition.' % (name), + nodes.literal_block(block_text, block_text), line=lineno) + return [error] + + >>> + + +Option Conversion Functions +=========================== + +An option specification (``Directive.option_spec``) must be defined +detailing the options available to the directive. An option spec is a +mapping of option name to conversion function; conversion functions +are applied to each option value to check validity and convert them to +the expected type. Python's built-in conversion functions are often +usable for this, such as ``int``, ``float``. Other useful conversion +functions are included in the ``docutils.parsers.rst.directives`` +package (in the ``__init__.py`` module): + +- ``flag``: For options with no option arguments. Checks for an + argument (raises ``ValueError`` if found), returns ``None`` for + valid flag options. + +- ``unchanged_required``: Returns the text argument, unchanged. + Raises ``ValueError`` if no argument is found. + +- ``unchanged``: Returns the text argument, unchanged. Returns an + empty string ("") if no argument is found. + +- ``path``: Returns the path argument unwrapped (with newlines + removed). Raises ``ValueError`` if no argument is found. + +- ``uri``: Returns the URI argument with whitespace removed. Raises + ``ValueError`` if no argument is found. + +- ``nonnegative_int``: Checks for a nonnegative integer argument, + and raises ``ValueError`` if not. + +- ``class_option``: Converts the argument into an ID-compatible + string and returns it. Raises ``ValueError`` if no argument is + found. + +- ``unicode_code``: Convert a Unicode character code to a Unicode + character. + +- ``single_char_or_unicode``: A single character is returned as-is. + Unicode characters codes are converted as in ``unicode_code``. + +- ``single_char_or_whitespace_or_unicode``: As with + ``single_char_or_unicode``, but "tab" and "space" are also + supported. + +- ``positive_int``: Converts the argument into an integer. Raises + ValueError for negative, zero, or non-integer values. + +- ``positive_int_list``: Converts a space- or comma-separated list + of integers into a Python list of integers. Raises ValueError for + non-positive-integer values. + +- ``encoding``: Verifies the encoding argument by lookup. Raises + ValueError for unknown encodings. + +A further utility function, ``choice``, is supplied to enable +options whose argument must be a member of a finite set of possible +values. A custom conversion function must be written to use it. +For example:: + + from docutils.parsers.rst import directives + + def yesno(argument): + return directives.choice(argument, ('yes', 'no')) + +For example, here is an option spec for a directive which allows two +options, "name" and "value", each with an option argument:: + + option_spec = {'name': unchanged, 'value': int} + + +Error Handling +============== + +If your directive implementation encounters an error during +processing, you should call ``self.error()`` inside the ``run()`` +method:: + + if error_condition: + raise self.error('Error message.') + +The ``self.error()`` method will immediately raise an exception that +will be caught by the reStructuredText directive handler. The +directive handler will then insert an error-level system message in +the document at the place where the directive occurred. + +Instead of ``self.error``, you can also use ``self.severe`` and +``self.warning`` for more or less severe problems. + +If you want to return a system message *and* document contents, you need to +create the system message yourself instead of using the ``self.error`` +convenience method:: + + def run(self): + # Create node(s). + node = nodes.paragraph(...) + # Node list to return. + node_list = [node] + if error_condition: + # Create system message. + error = self.reporter.error( + 'Error in "%s" directive: Your error message.' % self.name, + nodes.literal_block(block_text, block_text), line=lineno) + node_list.append(error) + return node_list + + +Register the Directive +====================== + +* If the directive is a general-use **addition to the Docutils core**, + it must be registered with the parser and language mappings added: + + 1. Register the new directive using its canonical name in + ``docutils/parsers/rst/directives/__init__.py``, in the + ``_directive_registry`` dictionary. This allows the + reStructuredText parser to find and use the directive. + + 2. Add an entry to the ``directives`` dictionary in + ``docutils/parsers/rst/languages/en.py`` for the directive, mapping + the English name to the canonical name (both lowercase). Usually + the English name and the canonical name are the same. + + 3. Update all the other language modules as well. For languages in + which you are proficient, please add translations. For other + languages, add the English directive name plus "(translation + required)". + +* If the directive is **application-specific**, use the + ``register_directive`` function:: + + from docutils.parsers.rst import directives + directives.register_directive(directive_name, directive_class) + + +Examples +======== + +For the most direct and accurate information, "Use the Source, Luke!". +All standard directives are documented in `reStructuredText +Directives`_, and the source code implementing them is located in the +``docutils/parsers/rst/directives`` package. The ``__init__.py`` +module contains a mapping of directive name to module and function +name. Several representative directives are described below. + + +Admonitions +----------- + +`Admonition directives`__, such as "note" and "caution", are quite +simple. They have no directive arguments or options. Admonition +directive content is interpreted as ordinary reStructuredText. + +__ ../ref/rst/directives.html#specific-admonitions + +The resulting document tree for a simple reStructuredText line +"``.. note:: This is a note.``" looks as follows: + + <note> + <paragraph> + This is a note. + +The directive class for the "note" directive simply derives from a +generic admonition directive class:: + + class Note(BaseAdmonition): + + node_class = nodes.note + +Note that the only thing distinguishing the various admonition +directives is the element (node class) generated. In the code above, +the node class is set as a class attribute and is read by the +``run()`` method of ``BaseAdmonition``, where the actual processing +takes place:: + + # Import Docutils document tree nodes module. + from docutils import nodes + # Import Directive base class. + from docutils.parsers.rst import Directive + + class BaseAdmonition(Directive): + + required_arguments = 0 + optional_arguments = 0 + final_argument_whitespace = True + option_spec = {} + has_content = True + + node_class = None + """Subclasses must set this to the appropriate admonition node class.""" + + def run(self): + # Raise an error if the directive does not have contents. + self.assert_has_content() + text = '\n'.join(self.content) + # Create the admonition node, to be populated by `nested_parse`. + admonition_node = self.node_class(rawsource=text) + # Parse the directive contents. + self.state.nested_parse(self.content, self.content_offset, + admonition_node) + return [admonition_node] + +Three things are noteworthy in the ``run()`` method above: + +* The ``admonition_node = self.node_class(text)`` line creates the + wrapper element, using the class set by the specific admonition + subclasses (as in note, ``node_class = nodes.note``). + +* The call to ``state.nested_parse()`` is what does the actual + processing. It parses the directive content and adds any generated + elements as child elements of ``admonition_node``. + +* If there was no directive content, the ``assert_has_content()`` + convenience method raises an error exception by calling + ``self.error()`` (see `Error Handling`_ above). + + +"image" +------- + +.. _image: ../ref/rst/directives.html#image + +The "image_" directive is used to insert a picture into a document. +This directive has one argument, the path to the image file, and +supports several options. There is no directive content. Here's an +early version of the image directive class:: + + # Import Docutils document tree nodes module. + from docutils import nodes + # Import ``directives`` module (contains conversion functions). + from docutils.parsers.rst import directives + # Import Directive base class. + from docutils.parsers.rst import Directive + + def align(argument): + """Conversion function for the "align" option.""" + return directives.choice(argument, ('left', 'center', 'right')) + + class Image(Directive): + + required_arguments = 1 + optional_arguments = 0 + final_argument_whitespace = True + option_spec = {'alt': directives.unchanged, + 'height': directives.nonnegative_int, + 'width': directives.nonnegative_int, + 'scale': directives.nonnegative_int, + 'align': align, + } + has_content = False + + def run(self): + reference = directives.uri(self.arguments[0]) + self.options['uri'] = reference + image_node = nodes.image(rawsource=self.block_text, + **self.options) + return [image_node] + +Several things are noteworthy in the code above: + +* The "image" directive requires a single argument, which is allowed + to contain whitespace (``final_argument_whitespace = True``). This + is to allow for long URLs which may span multiple lines. The first + line of the ``run()`` method joins the URL, discarding any embedded + whitespace. + +* The reference is added to the ``options`` dictionary under the + "uri" key; this becomes an attribute of the ``nodes.image`` element + object. Any other attributes have already been set explicitly in + the reStructuredText source text. + + +The Pending Element +------------------- + +Directives that cause actions to be performed *after* the complete +document tree has been generated can be implemented using a +``pending`` node. The ``pending`` node causes a transform_ to be run +after the document has been parsed. + +For an example usage of the ``pending`` node, see the implementation +of the ``contents`` directive in +docutils.parsers.rst.directives.parts__. + +.. _transform: ../api/transforms.html +__ https://docutils.sourceforge.io/docutils/parsers/rst/directives/parts.py diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/howto/rst-roles.txt b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/howto/rst-roles.txt new file mode 100644 index 00000000..7bfcc3de --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/howto/rst-roles.txt @@ -0,0 +1,235 @@ +================================================== + Creating reStructuredText Interpreted Text Roles +================================================== + +:Authors: David Goodger +:Contact: docutils-develop@lists.sourceforge.net +:Date: $Date$ +:Revision: $Revision$ +:Copyright: This document has been placed in the public domain. + +Interpreted text roles are an extension mechanism for inline markup in +reStructuredText. This document aims to make the creation of new +roles as easy and understandable as possible. + +Standard roles are described in `reStructuredText Interpreted Text +Roles`_. See the `Interpreted Text`_ section in the `reStructuredText +Markup Specification`_ for syntax details. + +.. _reStructuredText Interpreted Text Roles: ../ref/rst/roles.html +.. _Interpreted Text: + ../ref/rst/restructuredtext.html#interpreted-text +.. _reStructuredText Markup Specification: + ../ref/rst/restructuredtext.html + + +.. contents:: + + +Define the Role Function +======================== + +The role function creates and returns inline elements (nodes) and does +any additional processing required. Its signature is as follows:: + + def role_fn(name, rawtext, text, lineno, inliner, + options=None, content=None): + code... + + # Optional function attributes for customization: + role_fn.options = ... + role_fn.content = ... + +Function attributes are described below (see `Specify Role Function +Options and Content`_). The role function parameters are as follows: + +* ``name``: The local name of the interpreted role, the role name + actually used in the document. + +* ``rawtext``: A string containing the entire interpreted text input, + including the role and markup. Return it as a ``problematic`` node + linked to a system message if a problem is encountered. + +* ``text``: The interpreted text content. + +* ``lineno``: The line number where the text block containing the + interpreted text begins. + +* ``inliner``: The ``docutils.parsers.rst.states.Inliner`` object that + called role_fn. It contains the several attributes useful for error + reporting and document tree access. + +* ``options``: A dictionary of directive options for customization + (from the `"role" directive`_), to be interpreted by the role + function. Used for additional attributes for the generated elements + and other functionality. + +* ``content``: A list of strings, the directive content for + customization (from the `"role" directive`_). To be interpreted by + the role function. + +Role functions return a tuple of two values: + +* A list of nodes which will be inserted into the document tree at the + point where the interpreted role was encountered (can be an empty + list). + +* A list of system messages, which will be inserted into the document tree + immediately after the end of the current block (can also be empty). + + +Specify Role Function Options and Content +========================================= + +Function attributes are for customization, and are interpreted by the +`"role" directive`_. If unspecified, role function attributes are +assumed to have the value ``None``. Two function attributes are +recognized: + +- ``options``: The option specification. All role functions + implicitly support the "class" option, unless disabled with an + explicit ``{'class': None}``. + + An option specification must be defined detailing the options + available to the "role" directive. An option spec is a mapping of + option name to conversion function; conversion functions are applied + to each option value to check validity and convert them to the + expected type. Python's built-in conversion functions are often + usable for this, such as ``int``, ``float``, and ``bool`` (included + in Python from version 2.2.1). Other useful conversion functions + are included in the ``docutils.parsers.rst.directives`` package. + For further details, see `Creating reStructuredText Directives`_. + +- ``content``: A boolean; true if "role" directive content is allowed. + Role functions must handle the case where content is required but + not supplied (an empty content list will be supplied). + + As of this writing, no roles accept directive content. + +Note that unlike directives, the "arguments" function attribute is not +supported for role customization. Directive arguments are handled by +the "role" directive itself. + +.. _"role" directive: ../ref/rst/directives.html#role +.. _Creating reStructuredText Directives: + rst-directives.html#specify-directive-arguments-options-and-content + + +Register the Role +================= + +If the role is a general-use addition to the Docutils core, it must be +registered with the parser and language mappings added: + +1. Register the new role using the canonical name:: + + from docutils.parsers.rst import roles + roles.register_canonical_role(name, role_function) + + This code is normally placed immediately after the definition of + the role function. + +2. Add an entry to the ``roles`` dictionary in + ``docutils/parsers/rst/languages/en.py`` for the role, mapping the + English name to the canonical name (both lowercase). Usually the + English name and the canonical name are the same. Abbreviations + and other aliases may also be added here. + +3. Update all the other language modules as well. For languages in + which you are proficient, please add translations. For other + languages, add the English role name plus "(translation required)". + +If the role is application-specific, use the ``register_local_role`` +function:: + + from docutils.parsers.rst import roles + roles.register_local_role(name, role_function) + + +Examples +======== + +For the most direct and accurate information, "Use the Source, Luke!". +All standard roles are documented in `reStructuredText Interpreted +Text Roles`_, and the source code implementing them is located in the +``docutils/parsers/rst/roles.py`` module. Several representative +roles are described below. + + +Generic Roles +------------- + +Many roles simply wrap a given element around the text. There's a +special helper function, ``register_generic_role``, which generates a +role function from the canonical role name and node class:: + + register_generic_role('emphasis', nodes.emphasis) + +For the implementation of ``register_generic_role``, see the +``docutils.parsers.rst.roles`` module. + + +RFC Reference Role +------------------ + +This role allows easy references to RFCs_ (Request For Comments +documents) by automatically providing the base URL, +http://www.faqs.org/rfcs/, and appending the RFC document itself +(rfcXXXX.html, where XXXX is the RFC number). For example:: + + See :RFC:`2822` for information about email headers. + +This is equivalent to:: + + See `RFC 2822`__ for information about email headers. + + __ http://www.faqs.org/rfcs/rfc2822.html + +Here is the implementation of the role:: + + def rfc_reference_role(role, rawtext, text, lineno, inliner, + options=None, content=None): + if "#" in text: + rfcnum, section = utils.unescape(text).split("#", 1) + else: + rfcnum, section = utils.unescape(text), None + try: + rfcnum = int(rfcnum) + if rfcnum < 1: + raise ValueError + except ValueError: + msg = inliner.reporter.error( + 'RFC number must be a number greater than or equal to 1; ' + '"%s" is invalid.' % text, line=lineno) + prb = inliner.problematic(rawtext, rawtext, msg) + return [prb], [msg] + # Base URL mainly used by inliner.rfc_reference, so this is correct: + ref = inliner.document.settings.rfc_base_url + inliner.rfc_url % rfcnum + if section is not None: + ref += "#"+section + options = normalize_role_options(options) + node = nodes.reference(rawtext, 'RFC ' + str(rfcnum), refuri=ref, + **options) + return [node], [] + + register_canonical_role('rfc-reference', rfc_reference_role) + +Noteworthy in the code above are: + +1. The interpreted text itself should contain the RFC number. The + ``try`` clause verifies by converting it to an integer. If the + conversion fails, the ``except`` clause is executed: a system + message is generated, the entire interpreted text construct (in + ``rawtext``) is wrapped in a ``problematic`` node (linked to the + system message), and the two are returned. + +2. The RFC reference itself is constructed from a stock URI, set as + the "refuri" attribute of a "reference" element. + +3. The ``options`` function parameter, a dictionary, may contain a + "class" customization attribute; it is interpreted and replaced + with a "classes" attribute by the ``set_classes()`` function. The + resulting "classes" attribute is passed through to the "reference" + element node constructor. + +.. _RFCs: http://foldoc.doc.ic.ac.uk/foldoc/foldoc.cgi?query=rfc&action=Search&sourceid=Mozilla-search diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/howto/security.txt b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/howto/security.txt new file mode 100644 index 00000000..b087359a --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/howto/security.txt @@ -0,0 +1,206 @@ +============================= + Deploying Docutils Securely +============================= + +:Author: David Goodger +:Contact: docutils-develop@lists.sourceforge.net +:Date: $Date$ +:Revision: $Revision$ +:Copyright: This document has been placed in the public domain. + +.. contents:: + +Introduction +============ + +Initially, Docutils was intended for command-line tools and +single-user applications. Through-the-web editing and processing was +not envisaged, therefore web security was not a consideration. Once +Docutils/reStructuredText started being incorporated into an +ever-increasing number of web applications (blogs__, wikis__, content +management systems, and others), several security issues arose and +have been addressed. Still, **Docutils does not come in a +through-the-web secure state**, because this would inconvenience +ordinary users. This document provides pointers to help you secure +the Docutils software in your applications. + +__ ../../FAQ.html#are-there-any-weblog-blog-projects-that-use-restructuredtext-syntax +__ ../../FAQ.html#are-there-any-wikis-that-use-restructuredtext-syntax + + +The Issues +========== + +File Creation +------------- + +Docutils does not do any checks before writing to a file: + +* Existing **files are overwritten** without asking! +* Files may be **written to any location** accessible to the process. +* There are **no restrictions to** the **file names**. + +Special care must be taken when allowing users to configure the *output +destination* or the `warning_stream`_, `record_dependencies`_, or +`_destination`_ settings. + +.. _warning_stream: ../user/config.html#warning-stream +.. _record_dependencies: ../user/config.html#record-dependencies +.. _`_destination`: ../user/config.html#destination + + +External Data Insertion +----------------------- + +There are several `reStructuredText directives`_ that can insert +external data (files and URLs) into the output document. These +directives are: + +* "include_", by its very nature, +* "raw_", through its ``:file:`` and ``:url:`` options, +* "csv-table_", through its ``:file:`` and ``:url:`` options, +* "image_", if `embed_images`_ is true. + +The "include_" directive and the other directives' file insertion +features can be disabled by setting "file_insertion_enabled_" to +"false__". + +__ ../user/config.html#configuration-file-syntax +.. _reStructuredText directives: ../ref/rst/directives.html +.. _include: ../ref/rst/directives.html#include +.. _raw: ../ref/rst/directives.html#raw-directive +.. _csv-table: ../ref/rst/directives.html#csv-table +.. _image: ../ref/rst/directives.html#image +.. _embed_images: ../user/config.html#embed-images +.. _file_insertion_enabled: ../user/config.html#file-insertion-enabled + + +Raw HTML Insertion +------------------ + +The "raw_" directive is intended for the insertion of +non-reStructuredText data that is passed untouched to the Writer. +This directive can be abused to bypass site features or insert +malicious JavaScript code into a web page. The "raw_" directive can +be disabled by setting "raw_enabled_" to "false". + +.. _raw_enabled: ../user/config.html#raw-enabled + + +CPU and memory utilization +-------------------------- + +Parsing **complex reStructuredText documents may require high +processing resources**. This enables `Denial of Service` attacks using +specially crafted input. + +It is recommended to enforce limits for the computation time and +resource utilization of the Docutils process when processing +untrusted input. In addition, the "line_length_limit_" can be +adapted. + +.. _line_length_limit: ../user/config.html#line-length-limit + + +Securing Docutils +================= + +Programmatically Via Application Default Settings +------------------------------------------------- + +If your application calls Docutils via one of the `convenience +functions`_, you can pass a dictionary of default settings that +override the component defaults:: + + defaults = {'file_insertion_enabled': False, + 'raw_enabled': False} + output = docutils.core.publish_string( + ..., settings_overrides=defaults) + +Note that these defaults can be overridden by configuration files (and +command-line options if applicable). If this is not desired, you can +disable configuration file processing with the ``_disable_config`` +setting:: + + defaults = {'file_insertion_enabled': False, + 'raw_enabled': False, + '_disable_config': True} + output = docutils.core.publish_string( + ..., settings_overrides=defaults) + +.. _convenience functions: ../api/publisher.html + + +Via a Configuration File +------------------------ + +You may secure Docutils via a configuration file: + +* if your application executes one of the `Docutils front-end tools`_ + as a separate process; +* if you cannot or choose not to alter the source code of your + application or the component that calls Docutils; or +* if you want to secure all Docutils deployments system-wide. + +If you call Docutils programmatically, it may be preferable to use the +methods described in the section above. + +Docutils automatically looks in three places for a configuration file: + +* ``/etc/docutils.conf``, for system-wide configuration, +* ``./docutils.conf`` (in the current working directory), for + project-specific configuration, and +* ``~/.docutils`` (in the user's home directory), for user-specific + configuration. + +These locations can be overridden by the ``DOCUTILSCONFIG`` +environment variable. Details about configuration files, the purpose +of the various locations, and ``DOCUTILSCONFIG`` are available in the +`"Configuration Files"`_ section of `Docutils Configuration`_. + +To fully secure a recent Docutils installation, the configuration file +should contain the following lines :: + + [general] + file-insertion-enabled: off + raw-enabled: no + +and untrusted users must be prevented to modify or create local +configuration files that overwrite these settings. + +.. _Docutils front-end tools: ../user/tools.html +.. _"Configuration Files": ../user/config.html#configuration-files +.. _Docutils Configuration: ../user/config.html + + +Version Applicability +===================== + +The "file_insertion_enabled_" and "raw_enabled_" settings were added +to Docutils 0.3.9; previous versions will ignore these settings. + +A bug existed in the configuration file handling of these settings in +Docutils 0.4 and earlier: the right-hand-side needed to be left blank +(no values):: + + [general] + file-insertion-enabled: + raw-enabled: + +The bug was fixed with the 0.4.1 release on 2006-11-12. + +The "line_length_limit_" is new in Docutils 0.17. + + +Related Documents +================= + +`Docutils Runtime Settings`_ explains the relationship between +component settings specifications, application settings +specifications, configuration files, and command-line options + +`Docutils Configuration`_ describes configuration files (locations, +structure, and syntax), and lists all settings and command-line +options. + +.. _Docutils Runtime Settings: ../api/runtime-settings.html diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/index.txt b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/index.txt new file mode 100644 index 00000000..1d83f6f4 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/index.txt @@ -0,0 +1,239 @@ +========================================== + Docutils Project Documentation Overview +========================================== + +:Author: David Goodger +:Contact: docutils-develop@lists.sourceforge.net +:Date: $Date$ +:Revision: $Revision$ +:Copyright: This document has been placed in the public domain. + +The latest working documents may be accessed individually below, or +from the ``docs`` directory of the `Docutils distribution`_. + +.. _Docutils: https://docutils.sourceforge.io/ +.. _Docutils distribution: https://docutils.sourceforge.io/#download + +.. header:: + Docutils_ | Overview | About__ | Users__ | Reference__ | Developers__ + +__ `project fundamentals`_ +__ user_ +__ ref_ +__ howto_ + + +.. contents:: + + +Docutils Stakeholders +===================== + +Docutils stakeholders can be categorized in several groups: + +1. End-users: users of reStructuredText and the Docutils tools. + Although some are developers (e.g. Python developers utilizing + reStructuredText for docstrings in their source), many are not. + +2. Client-developers: developers using Docutils as a library, + programmers developing *with* Docutils. + +3. Component-developers: those who implement application-specific + components, directives, and/or roles, separately from Docutils. + +4. Core-developers: developers of the Docutils codebase and + participants in the Docutils project community. + +5. Re-implementers: developers of alternate implementations of + Docutils. + +There's a lot of overlap between these groups. Most (perhaps all) +core-developers, component-developers, client-developers, and +re-implementers are also end-users. Core-developers are also +client-developers, and may also be component-developers in other +projects. Component-developers are also client-developers. + + +Project Fundamentals +==================== + +These files are for all Docutils stakeholders. They are kept at the +top level of the Docutils project directory. + +.. class:: narrow run-in + +:README_: Project overview: quick-start, requirements, + installation, and usage. +:COPYING_: Conditions for Docutils redistribution, with links to + licenses. +:FAQ_: Docutils Frequently Asked Questions. If you have a + question or issue, there's a good chance it's already + answered here. +:BUGS_: A list of known bugs, and how to report a bug. +:RELEASE-NOTES_: Summary of the major changes in recent releases and + notice of future incompatible changes. +:HISTORY_: Detailed change history log. +:THANKS_: Acknowledgements. + +.. _README: ../README.html +.. _BUGS: ../BUGS.html +.. _COPYING: ../COPYING.html +.. _Docutils FAQ: +.. _FAQ: ../FAQ.html +.. _RELEASE-NOTES: ../RELEASE-NOTES.html +.. _HISTORY: ../HISTORY.html +.. _THANKS: ../THANKS.html + + +.. _user: + +Introductory & Tutorial Material for End-Users +============================================== + +Docutils-general: + * `Docutils Front-End Tools <user/tools.html>`__ + * `Docutils Configuration <user/config.html>`__ + * `Docutils Mailing Lists <user/mailing-lists.html>`__ + * `Docutils Link List <user/links.html>`__ + +Writer-specific: + * `Docutils HTML Writers <user/html.html>`__ + * `Easy Slide Shows With reStructuredText & S5 <user/slide-shows.html>`__ + * `Docutils LaTeX Writer <user/latex.html>`__ + * `Man Page Writer for Docutils <user/manpage.html>`__ + * `Docutils ODF/OpenOffice/odt Writer <user/odt.html>`__ + +`reStructuredText <https://docutils.sourceforge.io/rst.html>`_: + * `A ReStructuredText Primer <user/rst/quickstart.html>`__ + (see also the `text source <user/rst/quickstart.txt>`__) + * `Quick reStructuredText <user/rst/quickref.html>`__ (user reference) + * `reStructuredText Cheat Sheet <user/rst/cheatsheet.txt>`__ (text + only; 1 page for syntax, 1 page directive & role reference) + * `Demonstration <user/rst/demo.html>`_ + of most reStructuredText features + (see also the `text source <user/rst/demo.txt>`__) + +Editor support: + * `Emacs support for reStructuredText <user/emacs.html>`_ + + +.. _ref: + +Reference Material for All Groups +================================= + +Many of these files began as developer specifications, but now that +they're mature and used by end-users and client-developers, they have +become reference material. Successful specs evolve into refs. + +Docutils-general: + * `The Docutils Document Tree <ref/doctree.html>`__ (incomplete) + * `Docutils Generic DTD <ref/docutils.dtd>`__ + * `OASIS XML Exchange Table Model Declaration Module + <ref/soextblx.dtd>`__ (CALS tables DTD module) + * `Docutils Design Specification`_ (PEP 258) + +reStructuredText_: + * `An Introduction to reStructuredText <ref/rst/introduction.html>`__ + (includes the `Goals <ref/rst/introduction.html#goals>`__ and + `History <ref/rst/introduction.html#history>`__ of reStructuredText) + * `reStructuredText Markup Specification <ref/rst/restructuredtext.html>`__ + * `reStructuredText Directives <ref/rst/directives.html>`__ + * `reStructuredText Interpreted Text Roles <ref/rst/roles.html>`__ + * `reStructuredText Standard Definition Files + <ref/rst/definitions.html>`_ + * `LaTeX syntax for mathematics <ref/rst/mathematics.html>`__ + (syntax used in "math" directive and role) + +.. _peps: + +Python Enhancement Proposals + * `PEP 256: Docstring Processing System Framework`__ is a high-level + generic proposal. [:PEP:`256` in the `master repository`_] + * `PEP 257: Docstring Conventions`__ addresses docstring style and + touches on content. [:PEP:`257` in the `master repository`_] + * `PEP 258: Docutils Design Specification`__ is an overview of the + architecture of Docutils. It documents design issues and + implementation details. [:PEP:`258` in the `master repository`_] + * `PEP 287: reStructuredText Docstring Format`__ proposes a standard + markup syntax. [:PEP:`287` in the `master repository`_] + + Please note that PEPs in the `master repository`_ developed + independent from the local versions after submission. + + __ peps/pep-0256.html + __ peps/pep-0257.html + .. _PEP 258: + .. _Docutils Design Specification: + __ peps/pep-0258.html + __ peps/pep-0287.html + .. _master repository: https://peps.python.org + +Prehistoric: + `Setext Documents Mirror`__ + + __ https://docutils.sourceforge.io/mirror/setext.html + + +.. _api: + +API Reference Material for Client-Developers +============================================ + +* `The Docutils Publisher <api/publisher.html>`__ +* `Docutils Runtime Settings <api/runtime-settings.html>`__ +* `Docutils Transforms <api/transforms.html>`__ + +The `Docutils Design Specification`_ (PEP 258) is a must-read for any +Docutils developer. + + +.. _howto: + +Instructions for Developers +=========================== + +:Security: `Deploying Docutils Securely <howto/security.html>`__ + +* `Inside A Docutils Command-Line Front-End Tool <howto/cmdline-tool.html>`__ +* `Writing HTML (CSS) Stylesheets for Docutils + <howto/html-stylesheets.html>`__ +* `Docutils Internationalization <howto/i18n.html>`__ +* `Creating reStructuredText Directives <howto/rst-directives.html>`__ +* `Creating reStructuredText Interpreted Text Roles + <howto/rst-roles.html>`__ + + +.. _dev: + +Development Notes and Plans for Core-Developers +=============================================== + +Docutils-general: + * `Docutils Hacker's Guide <dev/hacking.html>`__ + * `Docutils Distributor's Guide <dev/distributing.html>`__ + * `Docutils To Do List <dev/todo.html>`__ + * `Docutils Project Policies <dev/policies.html>`__ + * `Docutils Web Site <dev/website.html>`__ + * `Docutils Release Procedure <dev/release.html>`__ + * `The Docutils Subversion Repository <dev/repository.html>`__ + * `Docutils Testing <dev/testing.html>`__ + * `Docstring Semantics <dev/semantics.html>`__ (incomplete) + * `Python Source Reader <dev/pysource.html>`_ (incomplete) + * `Docutils Python DTD <dev/pysource.dtd>`_ + * `Plan for Enthought API Documentation Tool <dev/enthought-plan.html>`_ + * `Enthought API Documentation Tool RFP <dev/enthought-rfp.html>`_ + +reStructuredText_: + * `A Record of reStructuredText Syntax Alternatives + <dev/rst/alternatives.html>`__ + * `Problems With StructuredText <dev/rst/problems.html>`__ + + +.. + Local Variables: + mode: indented-text + indent-tabs-mode: nil + sentence-end-double-space: t + fill-column: 70 + End: diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/peps/pep-0256.txt b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/peps/pep-0256.txt new file mode 100644 index 00000000..cc5a54c4 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/peps/pep-0256.txt @@ -0,0 +1,303 @@ +PEP: 256 +Title: Docstring Processing System Framework +Version: $Revision$ +Last-Modified: $Date$ +Author: David Goodger <goodger@python.org> +Discussions-To: <doc-sig@python.org> +Status: Rejected +Type: Standards Track +Content-Type: text/x-rst +Created: 01-Jun-2001 +Post-History: 13-Jun-2001 + + +Rejection Notice +================ + +This proposal seems to have run out of steam. + + +Abstract +======== + +Python lends itself to inline documentation. With its built-in +docstring syntax, a limited form of `Literate Programming`_ is easy to +do in Python. However, there are no satisfactory standard tools for +extracting and processing Python docstrings. The lack of a standard +toolset is a significant gap in Python's infrastructure; this PEP aims +to fill the gap. + +The issues surrounding docstring processing have been contentious and +difficult to resolve. This PEP proposes a generic Docstring +Processing System (DPS) framework, which separates out the components +(program and conceptual), enabling the resolution of individual issues +either through consensus (one solution) or through divergence (many). +It promotes standard interfaces which will allow a variety of plug-in +components (input context readers, markup parsers, and output format +writers) to be used. + +The concepts of a DPS framework are presented independently of +implementation details. + + +Road Map to the Docstring PEPs +============================== + +There are many aspects to docstring processing. The "Docstring PEPs" +have broken up the issues in order to deal with each of them in +isolation, or as close as possible. The individual aspects and +associated PEPs are as follows: + +* Docstring syntax. PEP 287, "reStructuredText Docstring Format" + [#PEP-287]_, proposes a syntax for Python docstrings, PEPs, and + other uses. + +* Docstring semantics consist of at least two aspects: + + - Conventions: the high-level structure of docstrings. Dealt with + in PEP 257, "Docstring Conventions" [#PEP-257]_. + + - Methodology: rules for the informational content of docstrings. + Not addressed. + +* Processing mechanisms. This PEP (PEP 256) outlines the high-level + issues and specification of an abstract docstring processing system + (DPS). PEP 258, "Docutils Design Specification" [#PEP-258]_, is an + overview of the design and implementation of one DPS under + development. + +* Output styles: developers want the documentation generated from + their source code to look good, and there are many different ideas + about what that means. PEP 258 touches on "Stylist Transforms". + This aspect of docstring processing has yet to be fully explored. + +By separating out the issues, we can form consensus more easily +(smaller fights ;-), and accept divergence more readily. + + +Rationale +========= + +There are standard inline documentation systems for some other +languages. For example, Perl has POD_ ("Plain Old Documentation") and +Java has Javadoc_, but neither of these mesh with the Pythonic way. +POD syntax is very explicit, but takes after Perl in terms of +readability. Javadoc is HTML-centric; except for "``@field``" tags, +raw HTML is used for markup. There are also general tools such as +Autoduck_ and Web_ (Tangle & Weave), useful for multiple languages. + +There have been many attempts to write auto-documentation systems +for Python (not an exhaustive list): + +- Marc-Andre Lemburg's doc.py_ + +- Daniel Larsson's pythondoc_ & gendoc_ + +- Doug Hellmann's HappyDoc_ + +- Laurence Tratt's Crystal (no longer available on the web) + +- Ka-Ping Yee's pydoc_ (pydoc.py is now part of the Python standard + library; see below) + +- Tony Ibbs' docutils_ (Tony has donated this name to the `Docutils + project`_) + +- Edward Loper's STminus_ formalization and related efforts + +These systems, each with different goals, have had varying degrees of +success. A problem with many of the above systems was over-ambition +combined with inflexibility. They provided a self-contained set of +components: a docstring extraction system, a markup parser, an +internal processing system and one or more output format writers with +a fixed style. Inevitably, one or more aspects of each system had +serious shortcomings, and they were not easily extended or modified, +preventing them from being adopted as standard tools. + +It has become clear (to this author, at least) that the "all or +nothing" approach cannot succeed, since no monolithic self-contained +system could possibly be agreed upon by all interested parties. A +modular component approach designed for extension, where components +may be multiply implemented, may be the only chance for success. +Standard inter-component APIs will make the DPS components +comprehensible without requiring detailed knowledge of the whole, +lowering the barrier for contributions, and ultimately resulting in a +rich and varied system. + +Each of the components of a docstring processing system should be +developed independently. A "best of breed" system should be chosen, +either merged from existing systems, and/or developed anew. This +system should be included in Python's standard library. + + +PyDoc & Other Existing Systems +------------------------------ + +PyDoc became part of the Python standard library as of release 2.1. +It extracts and displays docstrings from within the Python interactive +interpreter, from the shell command line, and from a GUI window into a +web browser (HTML). Although a very useful tool, PyDoc has several +deficiencies, including: + +- In the case of the GUI/HTML, except for some heuristic hyperlinking + of identifier names, no formatting of the docstrings is done. They + are presented within ``<p><small><tt>`` tags to avoid unwanted line + wrapping. Unfortunately, the result is not attractive. + +- PyDoc extracts docstrings and structural information (class + identifiers, method signatures, etc.) from imported module objects. + There are security issues involved with importing untrusted code. + Also, information from the source is lost when importing, such as + comments, "additional docstrings" (string literals in non-docstring + contexts; see PEP 258 [#PEP-258]_), and the order of definitions. + +The functionality proposed in this PEP could be added to or used by +PyDoc when serving HTML pages. The proposed docstring processing +system's functionality is much more than PyDoc needs in its current +form. Either an independent tool will be developed (which PyDoc may +or may not use), or PyDoc could be expanded to encompass this +functionality and *become* the docstring processing system (or one +such system). That decision is beyond the scope of this PEP. + +Similarly for other existing docstring processing systems, their +authors may or may not choose compatibility with this framework. +However, if this framework is accepted and adopted as the Python +standard, compatibility will become an important consideration in +these systems' future. + + +Specification +============= + +The docstring processing system framework is broken up as follows: + +1. Docstring conventions. Documents issues such as: + + - What should be documented where. + + - First line is a one-line synopsis. + + PEP 257 [#PEP-257]_ documents some of these issues. + +2. Docstring processing system design specification. Documents + issues such as: + + - High-level spec: what a DPS does. + + - Command-line interface for executable script. + + - System Python API. + + - Docstring extraction rules. + + - Readers, which encapsulate the input context. + + - Parsers. + + - Document tree: the intermediate internal data structure. The + output of the Parser and Reader, and the input to the Writer all + share the same data structure. + + - Transforms, which modify the document tree. + + - Writers for output formats. + + - Distributors, which handle output management (one file, many + files, or objects in memory). + + These issues are applicable to any docstring processing system + implementation. PEP 258 [#PEP-258]_ documents these issues. + +3. Docstring processing system implementation. + +4. Input markup specifications: docstring syntax. PEP 287 [#PEP-287]_ + proposes a standard syntax. + +5. Input parser implementations. + +6. Input context readers ("modes": Python source code, PEP, standalone + text file, email, etc.) and implementations. + +7. Stylists: certain input context readers may have associated + stylists which allow for a variety of output document styles. + +8. Output formats (HTML, XML, TeX, DocBook, info, etc.) and writer + implementations. + +Components 1, 2/3/5, and 4 are the subject of individual companion +PEPs. If there is another implementation of the framework or +syntax/parser, additional PEPs may be required. Multiple +implementations of each of components 6 and 7 will be required; the +PEP mechanism may be overkill for these components. + + +Project Web Site +================ + +A SourceForge project has been set up for this work at +https://docutils.sourceforge.io/. + + +References and Footnotes +======================== + +.. [#PEP-287] PEP 287, reStructuredText Docstring Format, Goodger + (http://www.python.org/peps/pep-0287.html) + +.. [#PEP-257] PEP 257, Docstring Conventions, Goodger, Van Rossum + (http://www.python.org/peps/pep-0257.html) + +.. [#PEP-258] PEP 258, Docutils Design Specification, Goodger + (http://www.python.org/peps/pep-0258.html) + +.. _Literate Programming: http://www.literateprogramming.com/ + +.. _POD: http://perldoc.perl.org/perlpod.html + +.. _Javadoc: http://java.sun.com/j2se/javadoc/ + +.. _Autoduck: + http://www.helpmaster.com/hlp-developmentaids-autoduck.htm + +.. _Web: http://www-cs-faculty.stanford.edu/~knuth/cweb.html + +.. _doc.py: + http://www.egenix.com/files/python/SoftwareDescriptions.html#doc.py + +.. _pythondoc: +.. _gendoc: http://starship.python.net/crew/danilo/pythondoc/ + +.. _HappyDoc: http://happydoc.sourceforge.net/ + +.. _pydoc: http://www.python.org/doc/current/lib/module-pydoc.html + +.. _docutils: http://www.tibsnjoan.co.uk/docutils.html + +.. _Docutils project: https://docutils.sourceforge.io/ + +.. _STMinus: http://www.cis.upenn.edu/~edloper/pydoc/ + +.. _Python Doc-SIG: http://www.python.org/sigs/doc-sig/ + + +Copyright +========= + +This document has been placed in the public domain. + + +Acknowledgements +================ + +This document borrows ideas from the archives of the `Python +Doc-SIG`_. Thanks to all members past & present. + + + +.. + Local Variables: + mode: indented-text + indent-tabs-mode: nil + sentence-end-double-space: t + fill-column: 70 + End: diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/peps/pep-0257.txt b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/peps/pep-0257.txt new file mode 100644 index 00000000..1acacbd6 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/peps/pep-0257.txt @@ -0,0 +1,328 @@ +PEP: 257 +Title: Docstring Conventions +Version: $Revision$ +Last-Modified: $Date$ +Authors: David Goodger <goodger@python.org>, + Guido van Rossum <guido@python.org> +Discussions-To: doc-sig@python.org +Status: Active +Type: Informational +Content-Type: text/x-rst +Created: 29-May-2001 +Post-History: 13-Jun-2001 + + +Abstract +======== + +This PEP documents the semantics and conventions associated with +Python docstrings. + + +Rationale +========= + +The aim of this PEP is to standardize the high-level structure of +docstrings: what they should contain, and how to say it (without +touching on any markup syntax within docstrings). The PEP contains +conventions, not laws or syntax. + + "A universal convention supplies all of maintainability, clarity, + consistency, and a foundation for good programming habits too. + What it doesn't do is insist that you follow it against your will. + That's Python!" + + -- Tim Peters on comp.lang.python, 2001-06-16 + +If you violate these conventions, the worst you'll get is some dirty +looks. But some software (such as the Docutils_ docstring processing +system [1]_ [2]_) will be aware of the conventions, so following them +will get you the best results. + + +Specification +============= + +What is a Docstring? +-------------------- + +A docstring is a string literal that occurs as the first statement in +a module, function, class, or method definition. Such a docstring +becomes the ``__doc__`` special attribute of that object. + +All modules should normally have docstrings, and all functions and +classes exported by a module should also have docstrings. Public +methods (including the ``__init__`` constructor) should also have +docstrings. A package may be documented in the module docstring of +the ``__init__.py`` file in the package directory. + +String literals occurring elsewhere in Python code may also act as +documentation. They are not recognized by the Python bytecode +compiler and are not accessible as runtime object attributes (i.e. not +assigned to ``__doc__``), but two types of extra docstrings may be +extracted by software tools: + +1. String literals occurring immediately after a simple assignment at + the top level of a module, class, or ``__init__`` method are called + "attribute docstrings". + +2. String literals occurring immediately after another docstring are + called "additional docstrings". + +Please see PEP 258, "Docutils Design Specification" [2]_, for a +detailed description of attribute and additional docstrings. + +XXX Mention docstrings of 2.2 properties. + +For consistency, always use ``"""triple double quotes"""`` around +docstrings. Use ``r"""raw triple double quotes"""`` if you use any +backslashes in your docstrings. For Unicode docstrings, use +``u"""Unicode triple-quoted strings"""``. + +There are two forms of docstrings: one-liners and multi-line +docstrings. + + +One-line Docstrings +-------------------- + +One-liners are for really obvious cases. They should really fit on +one line. For example:: + + def kos_root(): + """Return the pathname of the KOS root directory.""" + global _kos_root + if _kos_root: return _kos_root + ... + +Notes: + +- Triple quotes are used even though the string fits on one line. + This makes it easy to later expand it. + +- The closing quotes are on the same line as the opening quotes. This + looks better for one-liners. + +- There's no blank line either before or after the docstring. + +- The docstring is a phrase ending in a period. It prescribes the + function or method's effect as a command ("Do this", "Return that"), + not as a description; e.g. don't write "Returns the pathname ...". + +- The one-line docstring should NOT be a "signature" reiterating the + function/method parameters (which can be obtained by introspection). + Don't do:: + + def function(a, b): + """function(a, b) -> list""" + + This type of docstring is only appropriate for C functions (such as + built-ins), where introspection is not possible. However, the + nature of the *return value* cannot be determined by introspection, + so it should be mentioned. The preferred form for such a docstring + would be something like:: + + def function(a, b): + """Do X and return a list.""" + + (Of course "Do X" should be replaced by a useful description!) + + +Multi-line Docstrings +---------------------- + +Multi-line docstrings consist of a summary line just like a one-line +docstring, followed by a blank line, followed by a more elaborate +description. The summary line may be used by automatic indexing +tools; it is important that it fits on one line and is separated from +the rest of the docstring by a blank line. The summary line may be on +the same line as the opening quotes or on the next line. The entire +docstring is indented the same as the quotes at its first line (see +example below). + +Insert a blank line before and after all docstrings (one-line or +multi-line) that document a class -- generally speaking, the class's +methods are separated from each other by a single blank line, and the +docstring needs to be offset from the first method by a blank line; +for symmetry, put a blank line between the class header and the +docstring. Docstrings documenting functions or methods generally +don't have this requirement, unless the function or method's body is +written as a number of blank-line separated sections -- in this case, +treat the docstring as another section, and precede it with a blank +line. + +The docstring of a script (a stand-alone program) should be usable as +its "usage" message, printed when the script is invoked with incorrect +or missing arguments (or perhaps with a "-h" option, for "help"). +Such a docstring should document the script's function and command +line syntax, environment variables, and files. Usage messages can be +fairly elaborate (several screens full) and should be sufficient for a +new user to use the command properly, as well as a complete quick +reference to all options and arguments for the sophisticated user. + +The docstring for a module should generally list the classes, +exceptions and functions (and any other objects) that are exported by +the module, with a one-line summary of each. (These summaries +generally give less detail than the summary line in the object's +docstring.) The docstring for a package (i.e., the docstring of the +package's ``__init__.py`` module) should also list the modules and +subpackages exported by the package. + +The docstring for a function or method should summarize its behavior +and document its arguments, return value(s), side effects, exceptions +raised, and restrictions on when it can be called (all if applicable). +Optional arguments should be indicated. It should be documented +whether keyword arguments are part of the interface. + +The docstring for a class should summarize its behavior and list the +public methods and instance variables. If the class is intended to be +subclassed, and has an additional interface for subclasses, this +interface should be listed separately (in the docstring). The class +constructor should be documented in the docstring for its ``__init__`` +method. Individual methods should be documented by their own +docstring. + +If a class subclasses another class and its behavior is mostly +inherited from that class, its docstring should mention this and +summarize the differences. Use the verb "override" to indicate that a +subclass method replaces a superclass method and does not call the +superclass method; use the verb "extend" to indicate that a subclass +method calls the superclass method (in addition to its own behavior). + +*Do not* use the Emacs convention of mentioning the arguments of +functions or methods in upper case in running text. Python is case +sensitive and the argument names can be used for keyword arguments, so +the docstring should document the correct argument names. It is best +to list each argument on a separate line. For example:: + + def complex(real=0.0, imag=0.0): + """Form a complex number. + + Keyword arguments: + real -- the real part (default 0.0) + imag -- the imaginary part (default 0.0) + + """ + if imag == 0.0 and real == 0.0: return complex_zero + ... + +The BDFL [3]_ recommends inserting a blank line between the last +paragraph in a multi-line docstring and its closing quotes, placing +the closing quotes on a line by themselves. This way, Emacs' +``fill-paragraph`` command can be used on it. + + +Handling Docstring Indentation +------------------------------ + +Docstring processing tools will strip a uniform amount of indentation +from the second and further lines of the docstring, equal to the +minimum indentation of all non-blank lines after the first line. Any +indentation in the first line of the docstring (i.e., up to the first +newline) is insignificant and removed. Relative indentation of later +lines in the docstring is retained. Blank lines should be removed +from the beginning and end of the docstring. + +Since code is much more precise than words, here is an implementation +of the algorithm:: + + def trim(docstring): + if not docstring: + return '' + # Convert tabs to spaces (following the normal Python rules) + # and split into a list of lines: + lines = docstring.expandtabs().splitlines() + # Determine minimum indentation (first line doesn't count): + indent = sys.maxint + for line in lines[1:]: + stripped = line.lstrip() + if stripped: + indent = min(indent, len(line) - len(stripped)) + # Remove indentation (first line is special): + trimmed = [lines[0].strip()] + if indent < sys.maxint: + for line in lines[1:]: + trimmed.append(line[indent:].rstrip()) + # Strip off trailing and leading blank lines: + while trimmed and not trimmed[-1]: + trimmed.pop() + while trimmed and not trimmed[0]: + trimmed.pop(0) + # Return a single string: + return '\n'.join(trimmed) + +The docstring in this example contains two newline characters and is +therefore 3 lines long. The first and last lines are blank:: + + def foo(): + """ + This is the second line of the docstring. + """ + +To illustrate:: + + >>> print repr(foo.__doc__) + '\n This is the second line of the docstring.\n ' + >>> foo.__doc__.splitlines() + ['', ' This is the second line of the docstring.', ' '] + >>> trim(foo.__doc__) + 'This is the second line of the docstring.' + +Once trimmed, these docstrings are equivalent:: + + def foo(): + """A multi-line + docstring. + """ + + def bar(): + """ + A multi-line + docstring. + """ + + +References and Footnotes +======================== + +.. [1] PEP 256, Docstring Processing System Framework, Goodger + (http://www.python.org/peps/pep-0256.html) + +.. [2] PEP 258, Docutils Design Specification, Goodger + (http://www.python.org/peps/pep-0258.html) + +.. [3] Guido van Rossum, Python's creator and Benevolent Dictator For + Life. + +.. _Docutils: https://docutils.sourceforge.io/ + +.. _Python Style Guide: + http://www.python.org/doc/essays/styleguide.html + +.. _Doc-SIG: http://www.python.org/sigs/doc-sig/ + + +Copyright +========= + +This document has been placed in the public domain. + + +Acknowledgements +================ + +The "Specification" text comes mostly verbatim from the `Python Style +Guide`_ essay by Guido van Rossum. + +This document borrows ideas from the archives of the Python Doc-SIG_. +Thanks to all members past and present. + + + +.. + Local Variables: + mode: indented-text + indent-tabs-mode: nil + fill-column: 70 + sentence-end-double-space: t + End: diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/peps/pep-0258.txt b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/peps/pep-0258.txt new file mode 100644 index 00000000..c271e881 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/peps/pep-0258.txt @@ -0,0 +1,1034 @@ +PEP: 258 +Title: Docutils Design Specification +Version: $Revision$ +Last-Modified: $Date$ +Author: David Goodger <goodger@python.org> +Discussions-To: <doc-sig@python.org> +Status: Rejected +Type: Standards Track +Content-Type: text/x-rst +Requires: 256, 257 +Created: 31-May-2001 +Post-History: 13-Jun-2001 + + +================ +Rejection Notice +================ + +While this may serve as an interesting design document for the +now-independent docutils, it is no longer slated for inclusion in the +standard library. + + +========== + Abstract +========== + +This PEP documents design issues and implementation details for +Docutils, a Python Docstring Processing System (DPS). The rationale +and high-level concepts of a DPS are documented in PEP 256, "Docstring +Processing System Framework" [#PEP-256]_. Also see PEP 256 for a +"Road Map to the Docstring PEPs". + +Docutils is being designed modularly so that any of its components can +be replaced easily. In addition, Docutils is not limited to the +processing of Python docstrings; it processes standalone documents as +well, in several contexts. + +No changes to the core Python language are required by this PEP. Its +deliverables consist of a package for the standard library and its +documentation. + + +=============== + Specification +=============== + +Docutils Project Model +====================== + +Project components and data flow:: + + +---------------------------+ + | Docutils: | + | docutils.core.Publisher, | + | docutils.core.publish_*() | + +---------------------------+ + / | \ + / | \ + 1,3,5 / 6 | \ 7 + +--------+ +-------------+ +--------+ + | READER | ----> | TRANSFORMER | ====> | WRITER | + +--------+ +-------------+ +--------+ + / \\ | + / \\ | + 2 / 4 \\ 8 | + +-------+ +--------+ +--------+ + | INPUT | | PARSER | | OUTPUT | + +-------+ +--------+ +--------+ + +The numbers above each component indicate the path a document's data +takes. Double-width lines between Reader & Parser and between +Transformer & Writer indicate that data sent along these paths should +be standard (pure & unextended) Docutils doc trees. Single-width +lines signify that internal tree extensions or completely unrelated +representations are possible, but they must be supported at both ends. + + +Publisher +--------- + +The ``docutils.core`` module contains a "Publisher" facade class and +several convenience functions: "publish_cmdline()" (for command-line +front ends), "publish_file()" (for programmatic use with file-like +I/O), and "publish_string()" (for programmatic use with string I/O). +The Publisher class encapsulates the high-level logic of a Docutils +system. The Publisher class has overall responsibility for +processing, controlled by the ``Publisher.publish()`` method: + +1. Set up internal settings (may include config files & command-line + options) and I/O objects. + +2. Call the Reader object to read data from the source Input object + and parse the data with the Parser object. A document object is + returned. + +3. Set up and apply transforms via the Transformer object attached to + the document. + +4. Call the Writer object which translates the document to the final + output format and writes the formatted data to the destination + Output object. Depending on the Output object, the output may be + returned from the Writer, and then from the ``publish()`` method. + +Calling the "publish" function (or instantiating a "Publisher" object) +with component names will result in default behavior. For custom +behavior (customizing component settings), create custom component +objects first, and pass *them* to the Publisher or ``publish_*`` +convenience functions. + + +Readers +------- + +Readers understand the input context (where the data is coming from), +send the whole input or discrete "chunks" to the parser, and provide +the context to bind the chunks together back into a cohesive whole. + +Each reader is a module or package exporting a "Reader" class with a +"read" method. The base "Reader" class can be found in the +``docutils/readers/__init__.py`` module. + +Most Readers will have to be told what parser to use. So far (see the +list of examples below), only the Python Source Reader ("PySource"; +still incomplete) will be able to determine the parser on its own. + +Responsibilities: + +* Get input text from the source I/O. + +* Pass the input text to the parser, along with a fresh `document + tree`_ root. + +Examples: + +* Standalone (Raw/Plain): Just read a text file and process it. + The reader needs to be told which parser to use. + + The "Standalone Reader" has been implemented in module + ``docutils.readers.standalone``. + +* Python Source: See `Python Source Reader`_ below. This Reader is + currently in development in the Docutils sandbox. + +* Email: RFC-822 headers, quoted excerpts, signatures, MIME parts. + +* PEP: RFC-822 headers, "PEP xxxx" and "RFC xxxx" conversion to URIs. + The "PEP Reader" has been implemented in module + ``docutils.readers.pep``; see PEP 287 and PEP 12. + +* Wiki: Global reference lookups of "wiki links" incorporated into + transforms. (CamelCase only or unrestricted?) Lazy + indentation? + +* Web Page: As standalone, but recognize meta fields as meta tags. + Support for templates of some sort? (After ``<body>``, before + ``</body>``?) + +* FAQ: Structured "question & answer(s)" constructs. + +* Compound document: Merge chapters into a book. Master manifest + file? + + +Parsers +------- + +Parsers analyze their input and produce a Docutils `document tree`_. +They don't know or care anything about the source or destination of +the data. + +Each input parser is a module or package exporting a "Parser" class +with a "parse" method. The base "Parser" class can be found in the +``docutils/parsers/__init__.py`` module. + +Responsibilities: Given raw input text and a doctree root node, +populate the doctree by parsing the input text. + +Example: The only parser implemented so far is for the +reStructuredText markup. It is implemented in the +``docutils/parsers/rst/`` package. + +The development and integration of other parsers is possible and +encouraged. + + +.. _transforms: + +Transformer +----------- + +The Transformer class, in ``docutils/transforms/__init__.py``, stores +transforms and applies them to documents. A transformer object is +attached to every new document tree. The Publisher_ calls +``Transformer.apply_transforms()`` to apply all stored transforms to +the document tree. Transforms change the document tree from one form +to another, add to the tree, or prune it. Transforms resolve +references and footnote numbers, process interpreted text, and do +other context-sensitive processing. + +Some transforms are specific to components (Readers, Parser, Writers, +Input, Output). Standard component-specific transforms are specified +in the ``default_transforms`` attribute of component classes. After +the Reader has finished processing, the Publisher_ calls +``Transformer.populate_from_components()`` with a list of components +and all default transforms are stored. + +Each transform is a class in a module in the ``docutils/transforms/`` +package, a subclass of ``docutils.tranforms.Transform``. Transform +classes each have a ``default_priority`` attribute which is used by +the Transformer to apply transforms in order (low to high). The +default priority can be overridden when adding transforms to the +Transformer object. + +Transformer responsibilities: + +* Apply transforms to the document tree, in priority order. + +* Store a mapping of component type name ('reader', 'writer', etc.) to + component objects. These are used by certain transforms (such as + "components.Filter") to determine suitability. + +Transform responsibilities: + +* Modify a doctree in-place, either purely transforming one structure + into another, or adding new structures based on the doctree and/or + external data. + +Examples of transforms (in the ``docutils/transforms/`` package): + +* frontmatter.DocInfo: Conversion of document metadata (bibliographic + information). + +* references.AnonymousHyperlinks: Resolution of anonymous references + to corresponding targets. + +* parts.Contents: Generates a table of contents for a document. + +* document.Merger: Combining multiple populated doctrees into one. + (Not yet implemented or fully understood.) + +* document.Splitter: Splits a document into a tree-structure of + subdocuments, perhaps by section. It will have to transform + references appropriately. (Neither implemented not remotely + understood.) + +* components.Filter: Includes or excludes elements which depend on a + specific Docutils component. + + +Writers +------- + +Writers produce the final output (HTML, XML, TeX, etc.). Writers +translate the internal `document tree`_ structure into the final data +format, possibly running Writer-specific transforms_ first. + +By the time the document gets to the Writer, it should be in final +form. The Writer's job is simply (and only) to translate from the +Docutils doctree structure to the target format. Some small +transforms may be required, but they should be local and +format-specific. + +Each writer is a module or package exporting a "Writer" class with a +"write" method. The base "Writer" class can be found in the +``docutils/writers/__init__.py`` module. + +Responsibilities: + +* Translate doctree(s) into specific output formats. + + - Transform references into format-native forms. + +* Write the translated output to the destination I/O. + +Examples: + +* XML: Various forms, such as: + + - Docutils XML (an expression of the internal document tree, + implemented as ``docutils.writers.docutils_xml``). + + - DocBook (being implemented in the Docutils sandbox). + +* HTML (XHTML 1.4 transitional implemented as ``docutils.writers.html4css1``). + +* PDF (a ReportLabs interface is being developed in the Docutils + sandbox). + +* LaTeX (implemented as ``docutils.writers.latex2e``). + +* Docutils-native pseudo-XML (implemented as + ``docutils.writers.pseudoxml``, used for testing). + +* Plain text + +* reStructuredText? + + +Input/Output +------------ + +I/O classes provide a uniform API for low-level input and output. +Subclasses will exist for a variety of input/output mechanisms. +However, they can be considered an implementation detail. Most +applications should be satisfied using one of the convenience +functions associated with the Publisher_. + +I/O classes are currently in the preliminary stages; there's a lot of +work yet to be done. Issues: + +* How to represent multi-file input (files & directories) in the API? + +* How to represent multi-file output? Perhaps "Writer" variants, one + for each output distribution type? Or Output objects with + associated transforms? + +Responsibilities: + +* Read data from the input source (Input objects) or write data to the + output destination (Output objects). + +Examples of input sources: + +* A single file on disk or a stream (implemented as + ``docutils.io.FileInput``). + +* Multiple files on disk (``MultiFileInput``?). + +* Python source files: modules and packages. + +* Python strings, as received from a client application + (implemented as ``docutils.io.StringInput``). + +Examples of output destinations: + +* A single file on disk or a stream (implemented as + ``docutils.io.FileOutput``). + +* A tree of directories and files on disk. + +* A Python string, returned to a client application (implemented as + ``docutils.io.StringOutput``). + +* No output; useful for programmatic applications where only a portion + of the normal output is to be used (implemented as + ``docutils.io.NullOutput``). + +* A single tree-shaped data structure in memory. + +* Some other set of data structures in memory. + + +Docutils Package Structure +========================== + +* Package "docutils". + + - Module "__init__.py" contains: class "Component", a base class for + Docutils components; class "SettingsSpec", a base class for + specifying runtime settings (used by docutils.frontend); and class + "TransformSpec", a base class for specifying transforms. + + - Module "docutils.core" contains facade class "Publisher" and + convenience functions. See `Publisher`_ above. + + - Module "docutils.frontend" provides runtime settings support, for + programmatic use and front-end tools (including configuration file + support, and command-line argument and option processing). + + - Module "docutils.io" provides a uniform API for low-level input + and output. See `Input/Output`_ above. + + - Module "docutils.nodes" contains the Docutils document tree + element class library plus tree-traversal Visitor pattern base + classes. See `Document Tree`_ below. + + - Module "docutils.statemachine" contains a finite state machine + specialized for regular-expression-based text filters and parsers. + The reStructuredText parser implementation is based on this + module. + + - Module "docutils.urischemes" contains a mapping of known URI + schemes ("http", "ftp", "mail", etc.). + + - Module "docutils.utils" contains utility functions and classes, + including a logger class ("Reporter"; see `Error Handling`_ + below). + + - Package "docutils.parsers": markup parsers_. + + - Function "get_parser_class(parser_name)" returns a parser module + by name. Class "Parser" is the base class of specific parsers. + (``docutils/parsers/__init__.py``) + + - Package "docutils.parsers.rst": the reStructuredText parser. + + - Alternate markup parsers may be added. + + See `Parsers`_ above. + + - Package "docutils.readers": context-aware input readers. + + - Function "get_reader_class(reader_name)" returns a reader module + by name or alias. Class "Reader" is the base class of specific + readers. (``docutils/readers/__init__.py``) + + - Module "docutils.readers.standalone" reads independent document + files. + + - Module "docutils.readers.pep" reads PEPs (Python Enhancement + Proposals). + + - Module "docutils.readers.doctree" is used to re-read a + previously stored document tree for reprocessing. + + - Readers to be added for: Python source code (structure & + docstrings), email, FAQ, and perhaps Wiki and others. + + See `Readers`_ above. + + - Package "docutils.writers": output format writers. + + - Function "get_writer_class(writer_name)" returns a writer module + by name. Class "Writer" is the base class of specific writers. + (``docutils/writers/__init__.py``) + + - Package "docutils.writers.html4css1" is a simple HyperText + Markup Language document tree writer for HTML 4.01 and CSS1. + + - Package "docutils.writers.pep_html" generates HTML from + reStructuredText PEPs. + + - Package "docutils.writers.s5_html" generates S5/HTML slide + shows. + + - Package "docutils.writers.latex2e" writes LaTeX. + + - Package "docutils.writers.newlatex2e" also writes LaTeX; it is a + new implementation. + + - Module "docutils.writers.docutils_xml" writes the internal + document tree in XML form. + + - Module "docutils.writers.pseudoxml" is a simple internal + document tree writer; it writes indented pseudo-XML. + + - Module "docutils.writers.null" is a do-nothing writer; it is + used for specialized purposes such as storing the internal + document tree. + + - Writers to be added: HTML 3.2 or 4.01-loose, XML (various forms, + such as DocBook), PDF, plaintext, reStructuredText, and perhaps + others. + + Subpackages of "docutils.writers" contain modules and data files + (such as stylesheets) that support the individual writers. + + See `Writers`_ above. + + - Package "docutils.transforms": tree transform classes. + + - Class "Transformer" stores transforms and applies them to + document trees. (``docutils/transforms/__init__.py``) + + - Class "Transform" is the base class of specific transforms. + (``docutils/transforms/__init__.py``) + + - Each module contains related transform classes. + + See `Transforms`_ above. + + - Package "docutils.languages": Language modules contain + language-dependent strings and mappings. They are named for their + language identifier (as defined in `Choice of Docstring Format`_ + below), converting dashes to underscores. + + - Function "get_language(language_code)", returns matching + language module. (``docutils/languages/__init__.py``) + + - Modules: en.py (English), de.py (German), fr.py (French), it.py + (Italian), sk.py (Slovak), sv.py (Swedish). + + - Other languages to be added. + +* Third-party modules: "extras" directory. These modules are + installed only if they're not already present in the Python + installation. + + - ``extras/roman.py`` contains Roman numeral conversion routines. + + +Front-End Tools +=============== + +The ``tools/`` directory contains several front ends for common +Docutils processing. See `Docutils Front-End Tools`_ for details. + +.. _Docutils Front-End Tools: + https://docutils.sourceforge.io/docs/user/tools.html + + +Document Tree +============= + +A single intermediate data structure is used internally by Docutils, +in the interfaces between components; it is defined in the +``docutils.nodes`` module. It is not required that this data +structure be used *internally* by any of the components, just +*between* components as outlined in the diagram in the `Docutils +Project Model`_ above. + +Custom node types are allowed, provided that either (a) a transform +converts them to standard Docutils nodes before they reach the Writer +proper, or (b) the custom node is explicitly supported by certain +Writers, and is wrapped in a filtered "pending" node. An example of +condition (a) is the `Python Source Reader`_ (see below), where a +"stylist" transform converts custom nodes. The HTML ``<meta>`` tag is +an example of condition (b); it is supported by the HTML Writer but +not by others. The reStructuredText "meta" directive creates a +"pending" node, which contains knowledge that the embedded "meta" node +can only be handled by HTML-compatible writers. The "pending" node is +resolved by the ``docutils.transforms.components.Filter`` transform, +which checks that the calling writer supports HTML; if it doesn't, the +"pending" node (and enclosed "meta" node) is removed from the +document. + +The document tree data structure is similar to a DOM tree, but with +specific node names (classes) instead of DOM's generic nodes. The +schema is documented in an XML DTD (eXtensible Markup Language +Document Type Definition), which comes in two parts: + +* the Docutils Generic DTD, docutils.dtd_, and + +* the OASIS Exchange Table Model, soextbl.dtd_. + +The DTD defines a rich set of elements, suitable for many input and +output formats. The DTD retains all information necessary to +reconstruct the original input text, or a reasonable facsimile +thereof. + +See `The Docutils Document Tree`_ for details (incomplete). + + +Error Handling +============== + +When the parser encounters an error in markup, it inserts a system +message (DTD element "system_message"). There are five levels of +system messages: + +* Level-0, "DEBUG": an internal reporting issue. There is no effect + on the processing. Level-0 system messages are handled separately + from the others. + +* Level-1, "INFO": a minor issue that can be ignored. There is little + or no effect on the processing. Typically level-1 system messages + are not reported. + +* Level-2, "WARNING": an issue that should be addressed. If ignored, + there may be minor problems with the output. Typically level-2 + system messages are reported but do not halt processing. + +* Level-3, "ERROR": a major issue that should be addressed. If + ignored, the output will contain unpredictable errors. Typically + level-3 system messages are reported but do not halt processing. + +* Level-4, "SEVERE": a critical error that must be addressed. + Typically level-4 system messages are turned into exceptions which + do halt processing. If ignored, the output will contain severe + errors. + +Although the initial message levels were devised independently, they +have a strong correspondence to `VMS error condition severity +levels`_; the names in quotes for levels 1 through 4 were borrowed +from VMS. Error handling has since been influenced by the `log4j +project`_. + + +Python Source Reader +==================== + +The Python Source Reader ("PySource") is the Docutils component that +reads Python source files, extracts docstrings in context, then +parses, links, and assembles the docstrings into a cohesive whole. It +is a major and non-trivial component, currently under experimental +development in the Docutils sandbox. High-level design issues are +presented here. + + +Processing Model +---------------- + +This model will evolve over time, incorporating experience and +discoveries. + +1. The PySource Reader uses an Input class to read in Python packages + and modules, into a tree of strings. + +2. The Python modules are parsed, converting the tree of strings into + a tree of abstract syntax trees with docstring nodes. + +3. The abstract syntax trees are converted into an internal + representation of the packages/modules. Docstrings are extracted, + as well as code structure details. See `AST Mining`_ below. + Namespaces are constructed for lookup in step 6. + +4. One at a time, the docstrings are parsed, producing standard + Docutils doctrees. + +5. PySource assembles all the individual docstrings' doctrees into a + Python-specific custom Docutils tree paralleling the + package/module/class structure; this is a custom Reader-specific + internal representation (see the `Docutils Python Source DTD`_). + Namespaces must be merged: Python identifiers, hyperlink targets. + +6. Cross-references from docstrings (interpreted text) to Python + identifiers are resolved according to the Python namespace lookup + rules. See `Identifier Cross-References`_ below. + +7. A "Stylist" transform is applied to the custom doctree (by the + Transformer_), custom nodes are rendered using standard nodes as + primitives, and a standard document tree is emitted. See `Stylist + Transforms`_ below. + +8. Other transforms are applied to the standard doctree by the + Transformer_. + +9. The standard doctree is sent to a Writer, which translates the + document into a concrete format (HTML, PDF, etc.). + +10. The Writer uses an Output class to write the resulting data to its + destination (disk file, directories and files, etc.). + + +AST Mining +---------- + +Abstract Syntax Tree mining code will be written (or adapted) that +scans a parsed Python module, and returns an ordered tree containing +the names, docstrings (including attribute and additional docstrings; +see below), and additional info (in parentheses below) of all of the +following objects: + +* packages +* modules +* module attributes (+ initial values) +* classes (+ inheritance) +* class attributes (+ initial values) +* instance attributes (+ initial values) +* methods (+ parameters & defaults) +* functions (+ parameters & defaults) + +(Extract comments too? For example, comments at the start of a module +would be a good place for bibliographic field lists.) + +In order to evaluate interpreted text cross-references, namespaces for +each of the above will also be required. + +See the python-dev/docstring-develop thread "AST mining", started on +2001-08-14. + + +Docstring Extraction Rules +-------------------------- + +1. What to examine: + + a) If the "``__all__``" variable is present in the module being + documented, only identifiers listed in "``__all__``" are + examined for docstrings. + + b) In the absence of "``__all__``", all identifiers are examined, + except those whose names are private (names begin with "_" but + don't begin and end with "__"). + + c) 1a and 1b can be overridden by runtime settings. + +2. Where: + + Docstrings are string literal expressions, and are recognized in + the following places within Python modules: + + a) At the beginning of a module, function definition, class + definition, or method definition, after any comments. This is + the standard for Python ``__doc__`` attributes. + + b) Immediately following a simple assignment at the top level of a + module, class definition, or ``__init__`` method definition, + after any comments. See `Attribute Docstrings`_ below. + + c) Additional string literals found immediately after the + docstrings in (a) and (b) will be recognized, extracted, and + concatenated. See `Additional Docstrings`_ below. + + d) @@@ 2.2-style "properties" with attribute docstrings? Wait for + syntax? + +3. How: + + Whenever possible, Python modules should be parsed by Docutils, not + imported. There are several reasons: + + - Importing untrusted code is inherently insecure. + + - Information from the source is lost when using introspection to + examine an imported module, such as comments and the order of + definitions. + + - Docstrings are to be recognized in places where the byte-code + compiler ignores string literal expressions (2b and 2c above), + meaning importing the module will lose these docstrings. + + Of course, standard Python parsing tools such as the "parser" + library module should be used. + + When the Python source code for a module is not available + (i.e. only the ``.pyc`` file exists) or for C extension modules, to + access docstrings the module can only be imported, and any + limitations must be lived with. + +Since attribute docstrings and additional docstrings are ignored by +the Python byte-code compiler, no namespace pollution or runtime bloat +will result from their use. They are not assigned to ``__doc__`` or +to any other attribute. The initial parsing of a module may take a +slight performance hit. + + +Attribute Docstrings +'''''''''''''''''''' + +(This is a simplified version of PEP 224 [#PEP-224]_.) + +A string literal immediately following an assignment statement is +interpreted by the docstring extraction machinery as the docstring of +the target of the assignment statement, under the following +conditions: + +1. The assignment must be in one of the following contexts: + + a) At the top level of a module (i.e., not nested inside a compound + statement such as a loop or conditional): a module attribute. + + b) At the top level of a class definition: a class attribute. + + c) At the top level of the "``__init__``" method definition of a + class: an instance attribute. Instance attributes assigned in + other methods are assumed to be implementation details. (@@@ + ``__new__`` methods?) + + d) A function attribute assignment at the top level of a module or + class definition. + + Since each of the above contexts are at the top level (i.e., in the + outermost suite of a definition), it may be necessary to place + dummy assignments for attributes assigned conditionally or in a + loop. + +2. The assignment must be to a single target, not to a list or a tuple + of targets. + +3. The form of the target: + + a) For contexts 1a and 1b above, the target must be a simple + identifier (not a dotted identifier, a subscripted expression, + or a sliced expression). + + b) For context 1c above, the target must be of the form + "``self.attrib``", where "``self``" matches the "``__init__``" + method's first parameter (the instance parameter) and "attrib" + is a simple identifier as in 3a. + + c) For context 1d above, the target must be of the form + "``name.attrib``", where "``name``" matches an already-defined + function or method name and "attrib" is a simple identifier as + in 3a. + +Blank lines may be used after attribute docstrings to emphasize the +connection between the assignment and the docstring. + +Examples:: + + g = 'module attribute (module-global variable)' + """This is g's docstring.""" + + class AClass: + + c = 'class attribute' + """This is AClass.c's docstring.""" + + def __init__(self): + """Method __init__'s docstring.""" + + self.i = 'instance attribute' + """This is self.i's docstring.""" + + def f(x): + """Function f's docstring.""" + return x**2 + + f.a = 1 + """Function attribute f.a's docstring.""" + + +Additional Docstrings +''''''''''''''''''''' + +(This idea was adapted from PEP 216 [#PEP-216]_.) + +Many programmers would like to make extensive use of docstrings for +API documentation. However, docstrings do take up space in the +running program, so some programmers are reluctant to "bloat up" their +code. Also, not all API documentation is applicable to interactive +environments, where ``__doc__`` would be displayed. + +Docutils' docstring extraction tools will concatenate all string +literal expressions which appear at the beginning of a definition or +after a simple assignment. Only the first strings in definitions will +be available as ``__doc__``, and can be used for brief usage text +suitable for interactive sessions; subsequent string literals and all +attribute docstrings are ignored by the Python byte-code compiler and +may contain more extensive API information. + +Example:: + + def function(arg): + """This is __doc__, function's docstring.""" + """ + This is an additional docstring, ignored by the byte-code + compiler, but extracted by Docutils. + """ + pass + +.. topic:: Issue: ``from __future__ import`` + + This would break "``from __future__ import``" statements introduced + in Python 2.1 for multiple module docstrings (main docstring plus + additional docstring(s)). The Python Reference Manual specifies: + + A future statement must appear near the top of the module. The + only lines that can appear before a future statement are: + + * the module docstring (if any), + * comments, + * blank lines, and + * other future statements. + + Resolution? + + 1. Should we search for docstrings after a ``__future__`` + statement? Very ugly. + + 2. Redefine ``__future__`` statements to allow multiple preceding + string literals? + + 3. Or should we not even worry about this? There probably + shouldn't be ``__future__`` statements in production code, after + all. Perhaps modules with ``__future__`` statements will simply + have to put up with the single-docstring limitation. + + +Choice of Docstring Format +-------------------------- + +Rather than force everyone to use a single docstring format, multiple +input formats are allowed by the processing system. A special +variable, ``__docformat__``, may appear at the top level of a module +before any function or class definitions. Over time or through +decree, a standard format or set of formats should emerge. + +A module's ``__docformat__`` variable only applies to the objects +defined in the module's file. In particular, the ``__docformat__`` +variable in a package's ``__init__.py`` file does not apply to objects +defined in subpackages and submodules. + +The ``__docformat__`` variable is a string containing the name of the +format being used, a case-insensitive string matching the input +parser's module or package name (i.e., the same name as required to +"import" the module or package), or a registered alias. If no +``__docformat__`` is specified, the default format is "plaintext" for +now; this may be changed to the standard format if one is ever +established. + +The ``__docformat__`` string may contain an optional second field, +separated from the format name (first field) by a single space: a +case-insensitive language identifier as defined in RFC 1766. A +typical language identifier consists of a 2-letter language code from +`ISO 639`_ (3-letter codes used only if no 2-letter code exists; RFC +1766 is currently being revised to allow 3-letter codes). If no +language identifier is specified, the default is "en" for English. +The language identifier is passed to the parser and can be used for +language-dependent markup features. + + +Identifier Cross-References +--------------------------- + +In Python docstrings, interpreted text is used to classify and mark up +program identifiers, such as the names of variables, functions, +classes, and modules. If the identifier alone is given, its role is +inferred implicitly according to the Python namespace lookup rules. +For functions and methods (even when dynamically assigned), +parentheses ('()') may be included:: + + This function uses `another()` to do its work. + +For class, instance and module attributes, dotted identifiers are used +when necessary. For example (using reStructuredText markup):: + + class Keeper(Storer): + + """ + Extend `Storer`. Class attribute `instances` keeps track + of the number of `Keeper` objects instantiated. + """ + + instances = 0 + """How many `Keeper` objects are there?""" + + def __init__(self): + """ + Extend `Storer.__init__()` to keep track of instances. + + Keep count in `Keeper.instances`, data in `self.data`. + """ + Storer.__init__(self) + Keeper.instances += 1 + + self.data = [] + """Store data in a list, most recent last.""" + + def store_data(self, data): + """ + Extend `Storer.store_data()`; append new `data` to a + list (in `self.data`). + """ + self.data = data + +Each of the identifiers quoted with backquotes ("`") will become +references to the definitions of the identifiers themselves. + + +Stylist Transforms +------------------ + +Stylist transforms are specialized transforms specific to the PySource +Reader. The PySource Reader doesn't have to make any decisions as to +style; it just produces a logically constructed document tree, parsed +and linked, including custom node types. Stylist transforms +understand the custom nodes created by the Reader and convert them +into standard Docutils nodes. + +Multiple Stylist transforms may be implemented and one can be chosen +at runtime (through a "--style" or "--stylist" command-line option). +Each Stylist transform implements a different layout or style; thus +the name. They decouple the context-understanding part of the Reader +from the layout-generating part of processing, resulting in a more +flexible and robust system. This also serves to "separate style from +content", the SGML/XML ideal. + +By keeping the piece of code that does the styling small and modular, +it becomes much easier for people to roll their own styles. The +"barrier to entry" is too high with existing tools; extracting the +stylist code will lower the barrier considerably. + + +========================== + References and Footnotes +========================== + +.. [#PEP-256] PEP 256, Docstring Processing System Framework, Goodger + (http://www.python.org/peps/pep-0256.html) + +.. [#PEP-224] PEP 224, Attribute Docstrings, Lemburg + (http://www.python.org/peps/pep-0224.html) + +.. [#PEP-216] PEP 216, Docstring Format, Zadka + (http://www.python.org/peps/pep-0216.html) + +.. _docutils.dtd: + https://docutils.sourceforge.io/docs/ref/docutils.dtd + +.. _soextbl.dtd: + https://docutils.sourceforge.io/docs/ref/soextblx.dtd + +.. _The Docutils Document Tree: + https://docutils.sourceforge.io/docs/ref/doctree.html + +.. _VMS error condition severity levels: + http://www.openvms.compaq.com:8000/73final/5841/841pro_027.html + #error_cond_severity + +.. _log4j project: http://logging.apache.org/log4j/docs/index.html + +.. _Docutils Python Source DTD: + https://docutils.sourceforge.io/docs/dev/pysource.dtd + +.. _ISO 639: http://www.loc.gov/standards/iso639-2/englangn.html + +.. _Python Doc-SIG: http://www.python.org/sigs/doc-sig/ + + + +================== + Project Web Site +================== + +A SourceForge project has been set up for this work at +https://docutils.sourceforge.io/. + + +=========== + Copyright +=========== + +This document has been placed in the public domain. + + +================== + Acknowledgements +================== + +This document borrows ideas from the archives of the `Python +Doc-SIG`_. Thanks to all members past & present. + + + +.. + Local Variables: + mode: indented-text + indent-tabs-mode: nil + sentence-end-double-space: t + fill-column: 70 + End: diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/peps/pep-0287.txt b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/peps/pep-0287.txt new file mode 100644 index 00000000..3d9b60c2 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/peps/pep-0287.txt @@ -0,0 +1,815 @@ +PEP: 287 +Title: reStructuredText Docstring Format +Version: $Revision$ +Last-Modified: $Date$ +Author: David Goodger <goodger@python.org> +Discussions-To: <doc-sig@python.org> +Status: Draft +Type: Informational +Content-Type: text/x-rst +Created: 25-Mar-2002 +Post-History: 02-Apr-2002 +Replaces: 216 + + +Abstract +======== + +When plaintext hasn't been expressive enough for inline documentation, +Python programmers have sought out a format for docstrings. This PEP +proposes that the `reStructuredText markup`_ be adopted as a standard +markup format for structured plaintext documentation in Python +docstrings, and for PEPs and ancillary documents as well. +reStructuredText is a rich and extensible yet easy-to-read, +what-you-see-is-what-you-get plaintext markup syntax. + +Only the low-level syntax of docstrings is addressed here. This PEP +is not concerned with docstring semantics or processing at all (see +PEP 256 for a "Road Map to the Docstring PEPs"). Nor is it an attempt +to deprecate pure plaintext docstrings, which are always going to be +legitimate. The reStructuredText markup is an alternative for those +who want more expressive docstrings. + + +Benefits +======== + +Programmers are by nature a lazy breed. We reuse code with functions, +classes, modules, and subsystems. Through its docstring syntax, +Python allows us to document our code from within. The "holy grail" +of the Python Documentation Special Interest Group (Doc-SIG_) has been +a markup syntax and toolset to allow auto-documentation, where the +docstrings of Python systems can be extracted in context and processed +into useful, high-quality documentation for multiple purposes. + +Document markup languages have three groups of customers: the authors +who write the documents, the software systems that process the data, +and the readers, who are the final consumers and the most important +group. Most markups are designed for the authors and software +systems; readers are only meant to see the processed form, either on +paper or via browser software. ReStructuredText is different: it is +intended to be easily readable in source form, without prior knowledge +of the markup. ReStructuredText is entirely readable in plaintext +format, and many of the markup forms match common usage (e.g., +``*emphasis*``), so it reads quite naturally. Yet it is rich enough +to produce complex documents, and extensible so that there are few +limits. Of course, to write reStructuredText documents some prior +knowledge is required. + +The markup offers functionality and expressivity, while maintaining +easy readability in the source text. The processed form (HTML etc.) +makes it all accessible to readers: inline live hyperlinks; live links +to and from footnotes; automatic tables of contents (with live +links!); tables; images for diagrams etc.; pleasant, readable styled +text. + +The reStructuredText parser is available now, part of the Docutils_ +project. Standalone reStructuredText documents and PEPs can be +converted to HTML; other output format writers are being worked on and +will become available over time. Work is progressing on a Python +source "Reader" which will implement auto-documentation from +docstrings. Authors of existing auto-documentation tools are +encouraged to integrate the reStructuredText parser into their +projects, or better yet, to join forces to produce a world-class +toolset for the Python standard library. + +Tools will become available in the near future, which will allow +programmers to generate HTML for online help, XML for multiple +purposes, and eventually PDF, DocBook, and LaTeX for printed +documentation, essentially "for free" from the existing docstrings. +The adoption of a standard will, at the very least, benefit docstring +processing tools by preventing further "reinventing the wheel". + +Eventually PyDoc, the one existing standard auto-documentation tool, +could have reStructuredText support added. In the interim it will +have no problem with reStructuredText markup, since it treats all +docstrings as preformatted plaintext. + + +Goals +===== + +These are the generally accepted goals for a docstring format, as +discussed in the Doc-SIG: + +1. It must be readable in source form by the casual observer. + +2. It must be easy to type with any standard text editor. + +3. It must not need to contain information which can be deduced from + parsing the module. + +4. It must contain sufficient information (structure) so it can be + converted to any reasonable markup format. + +5. It must be possible to write a module's entire documentation in + docstrings, without feeling hampered by the markup language. + +reStructuredText meets and exceeds all of these goals, and sets its +own goals as well, even more stringent. See `Docstring-Significant +Features`_ below. + +The goals of this PEP are as follows: + +1. To establish reStructuredText as a standard structured plaintext + format for docstrings (inline documentation of Python modules and + packages), PEPs, README-type files and other standalone documents. + "Accepted" status will be sought through Python community consensus + and eventual BDFL pronouncement. + + Please note that reStructuredText is being proposed as *a* + standard, not *the only* standard. Its use will be entirely + optional. Those who don't want to use it need not. + +2. To solicit and address any related concerns raised by the Python + community. + +3. To encourage community support. As long as multiple competing + markups are out there, the development community remains fractured. + Once a standard exists, people will start to use it, and momentum + will inevitably gather. + +4. To consolidate efforts from related auto-documentation projects. + It is hoped that interested developers will join forces and work on + a joint/merged/common implementation. + +Once reStructuredText is a Python standard, effort can be focused on +tools instead of arguing for a standard. Python needs a standard set +of documentation tools. + +With regard to PEPs, one or both of the following strategies may be +applied: + +a) Keep the existing PEP section structure constructs (one-line + section headers, indented body text). Subsections can either be + forbidden, or supported with reStructuredText-style underlined + headers in the indented body text. + +b) Replace the PEP section structure constructs with the + reStructuredText syntax. Section headers will require underlines, + subsections will be supported out of the box, and body text need + not be indented (except for block quotes). + +Strategy (b) is recommended, and its implementation is complete. + +Support for RFC 2822 headers has been added to the reStructuredText +parser for PEPs (unambiguous given a specific context: the first +contiguous block of the document). It may be desired to concretely +specify what over/underline styles are allowed for PEP section +headers, for uniformity. + + +Rationale +========= + +The lack of a standard syntax for docstrings has hampered the +development of standard tools for extracting and converting docstrings +into documentation in standard formats (e.g., HTML, DocBook, TeX). +There have been a number of proposed markup formats and variations, +and many tools tied to these proposals, but without a standard +docstring format they have failed to gain a strong following and/or +floundered half-finished. + +Throughout the existence of the Doc-SIG, consensus on a single +standard docstring format has never been reached. A lightweight, +implicit markup has been sought, for the following reasons (among +others): + +1. Docstrings written within Python code are available from within the + interactive interpreter, and can be "print"ed. Thus the use of + plaintext for easy readability. + +2. Programmers want to add structure to their docstrings, without + sacrificing raw docstring readability. Unadorned plaintext cannot + be transformed ("up-translated") into useful structured formats. + +3. Explicit markup (like XML or TeX) is widely considered unreadable + by the uninitiated. + +4. Implicit markup is aesthetically compatible with the clean and + minimalist Python syntax. + +Many alternative markups for docstrings have been proposed on the +Doc-SIG over the years; a representative sample is listed below. Each +is briefly analyzed in terms of the goals stated above. Please note +that this is *not* intended to be an exclusive list of all existing +markup systems; there are many other markups (Texinfo, Doxygen, TIM, +YODL, AFT, ...) which are not mentioned. + +- XML_, SGML_, DocBook_, HTML_, XHTML_ + + XML and SGML are explicit, well-formed meta-languages suitable for + all kinds of documentation. XML is a variant of SGML. They are + best used behind the scenes, because to untrained eyes they are + verbose, difficult to type, and too cluttered to read comfortably as + source. DocBook, HTML, and XHTML are all applications of SGML + and/or XML, and all share the same basic syntax and the same + shortcomings. + +- TeX_ + + TeX is similar to XML/SGML in that it's explicit, but not very easy + to write, and not easy for the uninitiated to read. + +- `Perl POD`_ + + Most Perl modules are documented in a format called POD (Plain Old + Documentation). This is an easy-to-type, very low level format with + strong integration with the Perl parser. Many tools exist to turn + POD documentation into other formats: info, HTML and man pages, + among others. However, the POD syntax takes after Perl itself in + terms of readability. + +- JavaDoc_ + + Special comments before Java classes and functions serve to document + the code. A program to extract these, and turn them into HTML + documentation is called javadoc, and is part of the standard Java + distribution. However, JavaDoc has a very intimate relationship + with HTML, using HTML tags for most markup. Thus it shares the + readability problems of HTML. + +- Setext_, StructuredText_ + + Early on, variants of Setext (Structure Enhanced Text), including + Zope Corp's StructuredText, were proposed for Python docstring + formatting. Hereafter these variants will collectively be called + "STexts". STexts have the advantage of being easy to read without + special knowledge, and relatively easy to write. + + Although used by some (including in most existing Python + auto-documentation tools), until now STexts have failed to become + standard because: + + - STexts have been incomplete. Lacking "essential" constructs that + people want to use in their docstrings, STexts are rendered less + than ideal. Note that these "essential" constructs are not + universal; everyone has their own requirements. + + - STexts have been sometimes surprising. Bits of text are + unexpectedly interpreted as being marked up, leading to user + frustration. + + - SText implementations have been buggy. + + - Most STexts have have had no formal specification except for the + implementation itself. A buggy implementation meant a buggy spec, + and vice-versa. + + - There has been no mechanism to get around the SText markup rules + when a markup character is used in a non-markup context. In other + words, no way to escape markup. + +Proponents of implicit STexts have vigorously opposed proposals for +explicit markup (XML, HTML, TeX, POD, etc.), and the debates have +continued off and on since 1996 or earlier. + +reStructuredText is a complete revision and reinterpretation of the +SText idea, addressing all of the problems listed above. + + +Specification +============= + +The specification and user documentaton for reStructuredText is +quite extensive. Rather than repeating or summarizing it all +here, links to the originals are provided. + +Please first take a look at `A ReStructuredText Primer`_, a short and +gentle introduction. The `Quick reStructuredText`_ user reference +quickly summarizes all of the markup constructs. For complete and +extensive details, please refer to the following documents: + +- `An Introduction to reStructuredText`_ + +- `reStructuredText Markup Specification`_ + +- `reStructuredText Directives`_ + +In addition, `Problems With StructuredText`_ explains many markup +decisions made with regards to StructuredText, and `A Record of +reStructuredText Syntax Alternatives`_ records markup decisions made +independently. + + +Docstring-Significant Features +============================== + +- A markup escaping mechanism. + + Backslashes (``\``) are used to escape markup characters when needed + for non-markup purposes. However, the inline markup recognition + rules have been constructed in order to minimize the need for + backslash-escapes. For example, although asterisks are used for + *emphasis*, in non-markup contexts such as "*" or "(*)" or "x * y", + the asterisks are not interpreted as markup and are left unchanged. + For many non-markup uses of backslashes (e.g., describing regular + expressions), inline literals or literal blocks are applicable; see + the next item. + +- Markup to include Python source code and Python interactive + sessions: inline literals, literal blocks, and doctest blocks. + + Inline literals use ``double-backquotes`` to indicate program I/O or + code snippets. No markup interpretation (including backslash-escape + [``\``] interpretation) is done within inline literals. + + Literal blocks (block-level literal text, such as code excerpts or + ASCII graphics) are indented, and indicated with a double-colon + ("::") at the end of the preceding paragraph (right here -->):: + + if literal_block: + text = 'is left as-is' + spaces_and_linebreaks = 'are preserved' + markup_processing = None + + Doctest blocks begin with ">>> " and end with a blank line. Neither + indentation nor literal block double-colons are required. For + example:: + + Here's a doctest block: + + >>> print 'Python-specific usage examples; begun with ">>>"' + Python-specific usage examples; begun with ">>>" + >>> print '(cut and pasted from interactive sessions)' + (cut and pasted from interactive sessions) + +- Markup that isolates a Python identifier: interpreted text. + + Text enclosed in single backquotes is recognized as "interpreted + text", whose interpretation is application-dependent. In the + context of a Python docstring, the default interpretation of + interpreted text is as Python identifiers. The text will be marked + up with a hyperlink connected to the documentation for the + identifier given. Lookup rules are the same as in Python itself: + LGB namespace lookups (local, global, builtin). The "role" of the + interpreted text (identifying a class, module, function, etc.) is + determined implicitly from the namespace lookup. For example:: + + class Keeper(Storer): + + """ + Keep data fresher longer. + + Extend `Storer`. Class attribute `instances` keeps track + of the number of `Keeper` objects instantiated. + """ + + instances = 0 + """How many `Keeper` objects are there?""" + + def __init__(self): + """ + Extend `Storer.__init__()` to keep track of + instances. Keep count in `self.instances` and data + in `self.data`. + """ + Storer.__init__(self) + self.instances += 1 + + self.data = [] + """Store data in a list, most recent last.""" + + def storedata(self, data): + """ + Extend `Storer.storedata()`; append new `data` to a + list (in `self.data`). + """ + self.data = data + + Each piece of interpreted text is looked up according to the local + namespace of the block containing its docstring. + +- Markup that isolates a Python identifier and specifies its type: + interpreted text with roles. + + Although the Python source context reader is designed not to require + explicit roles, they may be used. To classify identifiers + explicitly, the role is given along with the identifier in either + prefix or suffix form:: + + Use :method:`Keeper.storedata` to store the object's data in + `Keeper.data`:instance_attribute:. + + The syntax chosen for roles is verbose, but necessarily so (if + anyone has a better alternative, please post it to the Doc-SIG_). + The intention of the markup is that there should be little need to + use explicit roles; their use is to be kept to an absolute minimum. + +- Markup for "tagged lists" or "label lists": field lists. + + Field lists represent a mapping from field name to field body. + These are mostly used for extension syntax, such as "bibliographic + field lists" (representing document metadata such as author, date, + and version) and extension attributes for directives (see below). + They may be used to implement methodologies (docstring semantics), + such as identifying parameters, exceptions raised, etc.; such usage + is beyond the scope of this PEP. + + A modified RFC 2822 syntax is used, with a colon *before* as well as + *after* the field name. Field bodies are more versatile as well; + they may contain multiple field bodies (even nested field lists). + For example:: + + :Date: 2002-03-22 + :Version: 1 + :Authors: + - Me + - Myself + - I + + Standard RFC 2822 header syntax cannot be used for this construct + because it is ambiguous. A word followed by a colon at the + beginning of a line is common in written text. + +- Markup extensibility: directives and substitutions. + + Directives are used as an extension mechanism for reStructuredText, + a way of adding support for new block-level constructs without + adding new syntax. Directives for images, admonitions (note, + caution, etc.), and tables of contents generation (among others) + have been implemented. For example, here's how to place an image:: + + .. image:: mylogo.png + + Substitution definitions allow the power and flexibility of + block-level directives to be shared by inline text. For example:: + + The |biohazard| symbol must be used on containers used to + dispose of medical waste. + + .. |biohazard| image:: biohazard.png + +- Section structure markup. + + Section headers in reStructuredText use adornment via underlines + (and possibly overlines) rather than indentation. For example:: + + This is a Section Title + ======================= + + This is a Subsection Title + -------------------------- + + This paragraph is in the subsection. + + This is Another Section Title + ============================= + + This paragraph is in the second section. + + +Questions & Answers +=================== + +1. Is reStructuredText rich enough? + + Yes, it is for most people. If it lacks some construct that is + required for a specific application, it can be added via the + directive mechanism. If a useful and common construct has been + overlooked and a suitably readable syntax can be found, it can be + added to the specification and parser. + +2. Is reStructuredText *too* rich? + + For specific applications or individuals, perhaps. In general, no. + + Since the very beginning, whenever a docstring markup syntax has + been proposed on the Doc-SIG_, someone has complained about the + lack of support for some construct or other. The reply was often + something like, "These are docstrings we're talking about, and + docstrings shouldn't have complex markup." The problem is that a + construct that seems superfluous to one person may be absolutely + essential to another. + + reStructuredText takes the opposite approach: it provides a rich + set of implicit markup constructs (plus a generic extension + mechanism for explicit markup), allowing for all kinds of + documents. If the set of constructs is too rich for a particular + application, the unused constructs can either be removed from the + parser (via application-specific overrides) or simply omitted by + convention. + +3. Why not use indentation for section structure, like StructuredText + does? Isn't it more "Pythonic"? + + Guido van Rossum wrote the following in a 2001-06-13 Doc-SIG post: + + I still think that using indentation to indicate sectioning is + wrong. If you look at how real books and other print + publications are laid out, you'll notice that indentation is + used frequently, but mostly at the intra-section level. + Indentation can be used to offset lists, tables, quotations, + examples, and the like. (The argument that docstrings are + different because they are input for a text formatter is wrong: + the whole point is that they are also readable without + processing.) + + I reject the argument that using indentation is Pythonic: text + is not code, and different traditions and conventions hold. + People have been presenting text for readability for over 30 + centuries. Let's not innovate needlessly. + + See `Section Structure via Indentation`__ in `Problems With + StructuredText`_ for further elaboration. + + __ https://docutils.sourceforge.io/docs/dev/rst/problems.html + #section-structure-via-indentation + +4. Why use reStructuredText for PEPs? What's wrong with the existing + standard? + + The existing standard for PEPs is very limited in terms of general + expressibility, and referencing is especially lacking for such a + reference-rich document type. PEPs are currently converted into + HTML, but the results (mostly monospaced text) are less than + attractive, and most of the value-added potential of HTML + (especially inline hyperlinks) is untapped. + + Making reStructuredText a standard markup for PEPs will enable much + richer expression, including support for section structure, inline + markup, graphics, and tables. In several PEPs there are ASCII + graphics diagrams, which are all that plaintext documents can + support. Since PEPs are made available in HTML form, the ability + to include proper diagrams would be immediately useful. + + Current PEP practices allow for reference markers in the form "[1]" + in the text, and the footnotes/references themselves are listed in + a section toward the end of the document. There is currently no + hyperlinking between the reference marker and the + footnote/reference itself (it would be possible to add this to + pep2html.py, but the "markup" as it stands is ambiguous and + mistakes would be inevitable). A PEP with many references (such as + this one ;-) requires a lot of flipping back and forth. When + revising a PEP, often new references are added or unused references + deleted. It is painful to renumber the references, since it has to + be done in two places and can have a cascading effect (insert a + single new reference 1, and every other reference has to be + renumbered; always adding new references to the end is suboptimal). + It is easy for references to go out of sync. + + PEPs use references for two purposes: simple URL references and + footnotes. reStructuredText differentiates between the two. A PEP + might contain references like this:: + + Abstract + + This PEP proposes adding frungible doodads [1] to the core. + It extends PEP 9876 [2] via the BCA [3] mechanism. + + ... + + References and Footnotes + + [1] http://www.example.org/ + + [2] PEP 9876, Let's Hope We Never Get Here + http://www.python.org/peps/pep-9876.html + + [3] "Bogus Complexity Addition" + + Reference 1 is a simple URL reference. Reference 2 is a footnote + containing text and a URL. Reference 3 is a footnote containing + text only. Rewritten using reStructuredText, this PEP could look + like this:: + + Abstract + ======== + + This PEP proposes adding `frungible doodads`_ to the core. It + extends PEP 9876 [#pep9876]_ via the BCA [#]_ mechanism. + + ... + + References & Footnotes + ====================== + + .. _frungible doodads: http://www.example.org/ + + .. [#pep9876] PEP 9876, Let's Hope We Never Get Here + + .. [#] "Bogus Complexity Addition" + + URLs and footnotes can be defined close to their references if + desired, making them easier to read in the source text, and making + the PEPs easier to revise. The "References and Footnotes" section + can be auto-generated with a document tree transform. Footnotes + from throughout the PEP would be gathered and displayed under a + standard header. If URL references should likewise be written out + explicitly (in citation form), another tree transform could be + used. + + URL references can be named ("frungible doodads"), and can be + referenced from multiple places in the document without additional + definitions. When converted to HTML, references will be replaced + with inline hyperlinks (HTML <a> tags). The two footnotes are + automatically numbered, so they will always stay in sync. The + first footnote also contains an internal reference name, "pep9876", + so it's easier to see the connection between reference and footnote + in the source text. Named footnotes can be referenced multiple + times, maintaining consistent numbering. + + The "#pep9876" footnote could also be written in the form of a + citation:: + + It extends PEP 9876 [PEP9876]_ ... + + .. [PEP9876] PEP 9876, Let's Hope We Never Get Here + + Footnotes are numbered, whereas citations use text for their + references. + +5. Wouldn't it be better to keep the docstring and PEP proposals + separate? + + The PEP markup proposal may be removed if it is deemed that there + is no need for PEP markup, or it could be made into a separate PEP. + If accepted, PEP 1, PEP Purpose and Guidelines [#PEP-1]_, and PEP + 9, Sample PEP Template [#PEP-9]_ will be updated. + + It seems natural to adopt a single consistent markup standard for + all uses of structured plaintext in Python, and to propose it all + in one place. + +6. The existing pep2html.py script converts the existing PEP format to + HTML. How will the new-format PEPs be converted to HTML? + + A new version of pep2html.py with integrated reStructuredText + parsing has been completed. The Docutils project supports PEPs + with a "PEP Reader" component, including all functionality + currently in pep2html.py (auto-recognition of PEP & RFC references, + email masking, etc.). + +7. Who's going to convert the existing PEPs to reStructuredText? + + PEP authors or volunteers may convert existing PEPs if they like, + but there is no requirement to do so. The reStructuredText-based + PEPs will coexist with the old PEP standard. The pep2html.py + mentioned in answer 6 processes both old and new standards. + +8. Why use reStructuredText for README and other ancillary files? + + The reasoning given for PEPs in answer 4 above also applies to + README and other ancillary files. By adopting a standard markup, + these files can be converted to attractive cross-referenced HTML + and put up on python.org. Developers of other projects can also + take advantage of this facility for their own documentation. + +9. Won't the superficial similarity to existing markup conventions + cause problems, and result in people writing invalid markup (and + not noticing, because the plaintext looks natural)? How forgiving + is reStructuredText of "not quite right" markup? + + There will be some mis-steps, as there would be when moving from + one programming language to another. As with any language, + proficiency grows with experience. Luckily, reStructuredText is a + very little language indeed. + + As with any syntax, there is the possibility of syntax errors. It + is expected that a user will run the processing system over their + input and check the output for correctness. + + In a strict sense, the reStructuredText parser is very unforgiving + (as it should be; "In the face of ambiguity, refuse the temptation + to guess" [#Zen]_ applies to parsing markup as well as computer + languages). Here's design goal 3 from `An Introduction to + reStructuredText`_: + + Unambiguous. The rules for markup must not be open for + interpretation. For any given input, there should be one and + only one possible output (including error output). + + While unforgiving, at the same time the parser does try to be + helpful by producing useful diagnostic output ("system messages"). + The parser reports problems, indicating their level of severity + (from least to most: debug, info, warning, error, severe). The + user or the client software can decide on reporting thresholds; + they can ignore low-level problems or cause high-level problems to + bring processing to an immediate halt. Problems are reported + during the parse as well as included in the output, often with + two-way links between the source of the problem and the system + message explaining it. + +10. Will the docstrings in the Python standard library modules be + converted to reStructuredText? + + No. Python's library reference documentation is maintained + separately from the source. Docstrings in the Python standard + library should not try to duplicate the library reference + documentation. The current policy for docstrings in the Python + standard library is that they should be no more than concise + hints, simple and markup-free (although many *do* contain ad-hoc + implicit markup). + +11. I want to write all my strings in Unicode. Will anything + break? + + The parser fully supports Unicode. Docutils supports arbitrary + input and output encodings. + +12. Why does the community need a new structured text design? + + The existing structured text designs are deficient, for the + reasons given in "Rationale" above. reStructuredText aims to be a + complete markup syntax, within the limitations of the "readable + plaintext" medium. + +13. What is wrong with existing documentation methodologies? + + What existing methodologies? For Python docstrings, there is + **no** official standard markup format, let alone a documentation + methodology akin to JavaDoc. The question of methodology is at a + much higher level than syntax (which this PEP addresses). It is + potentially much more controversial and difficult to resolve, and + is intentionally left out of this discussion. + + +References & Footnotes +====================== + +.. [#PEP-1] PEP 1, PEP Guidelines, Warsaw, Hylton + (http://www.python.org/peps/pep-0001.html) + +.. [#PEP-9] PEP 9, Sample PEP Template, Warsaw + (http://www.python.org/peps/pep-0009.html) + +.. [#Zen] From `The Zen of Python (by Tim Peters)`__ (or just + "``import this``" in Python) + +__ http://www.python.org/doc/Humor.html#zen + +.. [#PEP-216] PEP 216, Docstring Format, Zadka + (http://www.python.org/peps/pep-0216.html) + +.. _reStructuredText markup: https://docutils.sourceforge.io/rst.html + +.. _Doc-SIG: http://www.python.org/sigs/doc-sig/ + +.. _XML: http://www.w3.org/XML/ + +.. _SGML: http://www.oasis-open.org/cover/general.html + +.. _DocBook: http://docbook.org/tdg/en/html/docbook.html + +.. _HTML: http://www.w3.org/MarkUp/ + +.. _XHTML: http://www.w3.org/MarkUp/#xhtml1 + +.. _TeX: http://www.tug.org/interest.html + +.. _Perl POD: http://perldoc.perl.org/perlpod.html + +.. _JavaDoc: http://java.sun.com/j2se/javadoc/ + +.. _Setext: https://docutils.sourceforge.io/mirror/setext.html + +.. _StructuredText: + http://www.zope.org/DevHome/Members/jim/StructuredTextWiki/FrontPage + +.. _A ReStructuredText Primer: + https://docutils.sourceforge.io/docs/user/rst/quickstart.html + +.. _Quick reStructuredText: + https://docutils.sourceforge.io/docs/user/rst/quickref.html + +.. _An Introduction to reStructuredText: + https://docutils.sourceforge.io/docs/ref/rst/introduction.html + +.. _reStructuredText Markup Specification: + https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html + +.. _reStructuredText Directives: + https://docutils.sourceforge.io/docs/ref/rst/directives.html + +.. _Problems with StructuredText: + https://docutils.sourceforge.io/docs/dev/rst/problems.html + +.. _A Record of reStructuredText Syntax Alternatives: + https://docutils.sourceforge.io/docs/dev/rst/alternatives.html + +.. _Docutils: https://docutils.sourceforge.io/ + + +Copyright +========= + +This document has been placed in the public domain. + + +Acknowledgements +================ + +Some text is borrowed from PEP 216, Docstring Format [#PEP-216]_, by +Moshe Zadka. + +Special thanks to all members past & present of the Python Doc-SIG_. + + + +.. + Local Variables: + mode: indented-text + indent-tabs-mode: nil + sentence-end-double-space: t + fill-column: 70 + End: diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/ref/doctree.txt b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/ref/doctree.txt new file mode 100644 index 00000000..a226da82 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/ref/doctree.txt @@ -0,0 +1,5418 @@ +============================ + The Docutils Document Tree +============================ + +A Guide to the Docutils DTD +*************************** + +:Author: David Goodger +:Contact: docutils-develop@lists.sourceforge.net +:Revision: $Revision$ +:Date: $Date$ +:Copyright: This document has been placed in the public domain. + + +.. contents:: :depth: 1 + + +This document describes the XML data structure of Docutils_ documents: +the relationships and semantics of elements and attributes. The +Docutils document structure is formally defined by the `Docutils +Generic DTD`_ XML document type definition, docutils.dtd_, which is +the definitive source for details of element structural relationships. + +This document does not discuss implementation details. Those can be +found in internal documentation (docstrings) for the +``docutils.nodes`` module, where the document tree data structure is +implemented in a class library. + +The reader is assumed to have some familiarity with XML or SGML, and +an understanding of the data structure meaning of "tree". For a list +of introductory articles, see `Introducing the Extensible Markup +Language (XML)`_. + +The reStructuredText_ markup is used for illustrative examples +throughout this document. For a gentle introduction, see `A +ReStructuredText Primer`_. For complete technical details, see the +`reStructuredText Markup Specification`_. + +.. _Docutils: https://docutils.sourceforge.io/ +.. _Docutils Generic DTD: +.. _Docutils DTD: +.. _docutils.dtd: docutils.dtd +.. _Introducing the Extensible Markup Language (XML): + http://xml.coverpages.org/xmlIntro.html +.. _reStructuredText: https://docutils.sourceforge.io/rst.html +.. _A ReStructuredText Primer: ../user/rst/quickstart.html +.. _reStructuredText Markup Specification: rst/restructuredtext.html + + +------------------- + Element Hierarchy +------------------- + +.. contents:: :local: + +Below is a simplified diagram of the hierarchy of elements in the +Docutils document tree structure. An element may contain any other +elements immediately below it in the diagram. Notes are written in +square brackets. Element types in parentheses indicate recursive or +one-to-many relationships; sections may contain (sub)sections, tables +contain further body elements, etc. :: + + +--------------------------------------------------------------------+ + | document [may begin with a title, subtitle, decoration, docinfo] | + | +--------------------------------------+ + | | sections [each begins with a title] | + +-----------------------------+-------------------------+------------+ + | [body elements:] | (sections) | + | | - literal | - lists | | - hyperlink +------------+ + | | blocks | - tables | | targets | + | para- | - doctest | - block | foot- | - sub. defs | + | graphs | blocks | quotes | notes | - comments | + +---------+-----------+----------+-------+--------------+ + | [text]+ | [text] | (body elements) | [text] | + | (inline +-----------+------------------+--------------+ + | markup) | + +---------+ + +The Docutils document model uses a simple, recursive model for section +structure. A document_ node may contain body elements and section_ +elements. Sections in turn may contain body elements and sections. +The level (depth) of a section element is determined from its physical +nesting level; unlike other document models (``<h1>`` in HTML_, +``<sect1>`` in DocBook_, ``<div1>`` in XMLSpec_) the level is not +incorporated into the element name. + +The Docutils document model uses strict element content models. Every +element has a unique structure and semantics, but elements may be +classified into general categories (below). Only elements which are +meant to directly contain text data have a mixed content model, where +text data and inline elements may be intermixed. This is unlike the +much looser HTML_ document model, where paragraphs and text data may +occur at the same level. + +.. _HTML: https://www.w3.org/TR/html52/ +.. _DocBook: https://tdg.docbook.org/tdg/5.1/ +.. _XMLSpec: https://www.w3.org/XML/1998/06/xmlspec-report.htm + + +Structural Elements +=================== + +Structural elements may only contain child elements; they do not +directly contain text data. Structural elements may contain body +elements or further structural elements. Structural elements can only +be child elements of other structural elements. + +Category members: document_, section_, topic_, sidebar_ + + +Structural Subelements +---------------------- + +Structural subelements are child elements of structural elements. +Simple structuctural subelements (title_, subtitle_) contain text +data; the others are compound and do not directly contain text data. + +Category members: title_, subtitle_, decoration_, docinfo_, meta_, +transition_ + + +Bibliographic Elements +`````````````````````` + +The docinfo_ element is an optional child of document_. It groups +bibliographic elements together. All bibliographic elements except +authors_ and field_ contain text data. authors_ contains further +bibliographic elements (most notably author_). field_ contains +field_name_ and field_body_ body subelements. + +Category members: address_, author_, authors_, contact_, copyright_, +date_, field_, organization_, revision_, status_, version_ + + +Decorative Elements +``````````````````` + +The decoration_ element is also an optional child of document_. It +groups together elements used to generate page headers and footers. + +Category members: footer_, header_ + + +Body Elements +============= + +Body elements are contained within structural elements and compound +body elements. There are two subcategories of body elements: simple +and compound. + +Category members: admonition_, attention_, block_quote_, bullet_list_, +caution_, citation_, comment_, compound_, container_, danger_, +definition_list_, doctest_block_, enumerated_list_, error_, +field_list_, figure_, footnote_, hint_, image_, important_, +line_block_, literal_block_, note_, option_list_, paragraph_, +pending_, raw_, rubric_, substitution_definition_, system_message_, +table_, target_, tip_, warning_ + + +Simple Body Elements +-------------------- + +Simple body elements are empty or directly contain text data. Those +that contain text data may also contain inline elements. Such +elements therefore have a "mixed content model". + +Category members: comment_, doctest_block_, image_, literal_block_, +math_block_, paragraph_, pending_, raw_, rubric_, substitution_definition_, +target_ + + +Compound Body Elements +---------------------- + +Compound body elements contain local substructure (body subelements) +and further body elements. They do not directly contain text data. + +Category members: admonition_, attention_, block_quote_, bullet_list_, +caution_, citation_, compound_, container_, danger_, definition_list_, +enumerated_list_, error_, field_list_, figure_, footnote_, hint_, +important_, line_block, note_, option_list_, system_message_, table_, +tip_, warning_ + + +Body Subelements +```````````````` + +Compound body elements contain specific subelements (e.g. bullet_list_ +contains list_item_). Subelements may themselves be compound elements +(containing further child elements, like field_) or simple data +elements (containing text data, like field_name_). These subelements +always occur within specific parent elements, never at the body +element level (beside paragraphs, etc.). + +Category members (simple): attribution_, caption_, classifier_, +colspec_, field_name_, label_, line_, option_argument_, +option_string_, term_ + +Category members (compound): definition_, definition_list_item_, +description_, entry_, field_, field_body_, legend_, list_item_, +option_, option_group_, option_list_item_, row_, tbody_, tgroup_, +thead_ + + +Inline Elements +=============== + +Inline elements directly contain text data, and may also contain +further inline elements. Inline elements are contained within simple +body elements. Most inline elements have a "mixed content model". + +Category members: abbreviation_, acronym_, citation_reference_, +emphasis_, footnote_reference_, generated_, image_, inline_, literal_, +math_, problematic_, reference_, strong_, subscript_, +substitution_reference_, superscript_, target_, title_reference_, raw_ + + +------------------- + Element Reference +------------------- + +.. contents:: :local: + :depth: 1 + +Each element in the DTD (document type definition) is described in its +own section below. Each section contains an introduction plus the +following subsections: + +* Details (of element relationships and semantics): + + - Category: One or more references to the element categories in + `Element Hierarchy`_ above. Some elements belong to more than one + category. + + - Analogues: Describes analogous elements in well-known document + models such as HTML_ or DocBook_. Lists similarities and + differences. + + - Processing: Lists formatting or rendering recommendations for the + element. + + - Parents: A list of elements which may contain the element. + + - Children: A list of elements which may occur within the element + followed by the formal XML content model from the `Docutils DTD`_. + + - Attributes: Describes (or refers to descriptions of) the possible + values and semantics of each attribute. + + - Parameter Entities: Lists the parameter entities which directly or + indirectly include the element. + +* Examples: reStructuredText_ examples are shown along with + fragments of the document trees resulting from parsing. + _`Pseudo-XML` is used for the results of parsing and processing. + Pseudo-XML is a representation of XML where nesting is indicated by + indentation and end-tags are not shown. Some of the precision of + real XML is given up in exchange for easier readability. For + example, the following are equivalent: + + - Real XML:: + + <document> + <section ids="a-title" names="a title"> + <title>A Title + A paragraph. +
+
+ + - Pseudo-XML:: + + +
+ + A Title + <paragraph> + A paragraph. + +-------------------- + +Many of the element reference sections below are marked "_`to be +completed`". Please help complete this document by contributing to +its writing. + + +``abbreviation`` +================ + +The ``abbreviation`` element is an inline element used to represent an +abbreviation being used in the document. An example of an abbreviation is 'St' +being used instead of 'Street'. + + +Details +------- + +:Category: + `Inline Elements`_ + +:Analogues: + ``abbreviation`` is analogous to the HTML "abbr" element. + +:Parents: + All elements employing the `%inline.elements;`_ parameter entity in their + content models may contain ``abbreviation``. + +:Children: + ``abbreviation`` elements may contain text data plus `inline elements`_. + + .. parsed-literal:: + + `%text.model;`_ + +:Attributes: + The ``abbreviation`` element contains only the `common attributes`_. + + +Examples +-------- + +The ``abbreviation`` element is not exposed in default restructured text. It +can only be accessed through custom roles. + +Pseudo-XML_ example from a custom `:abbr:` role:: + + <paragraph> + <abbreviation explanation="Street"> + St + is a common abbreviation for "street". + + +``acronym`` +=========== + +`To be completed`_. + + +``address`` +=========== + +The ``address`` element holds the surface mailing address information +for the author (individual or group) of the document, or a third-party +contact address. Its structure is identical to that of the +literal_block_ element: whitespace is significant, especially +newlines. + + +Details +------- + +:Category: + `Bibliographic Elements`_ + +:Analogues: + ``address`` is analogous to the DocBook "address" element. + +:Processing: + As with the literal_block_ element, newlines and other whitespace + is significant and must be preserved. However, a monospaced + typeface need not be used. + + See also docinfo_. + +:Parents: + The following elements may contain ``address``: docinfo_, authors_ + +:Children: + ``address`` elements contain text data plus `inline elements`_. + + .. parsed-literal:: + + `%text.model;`_ + +:Attributes: + The ``address`` element contains the `common attributes`_ plus + `xml:space`_. + +:Parameter Entities: + The `%bibliographic.elements;`_ parameter entity directly includes + ``address``. + + +Examples +-------- + +reStructuredText_ source:: + + Document Title + ============== + + :Address: 123 Example Ave. + Example, EX + +Complete pseudo-XML_ result after parsing and applying transforms:: + + <document ids="document-title" names="document title"> + <title> + Document Title + <docinfo> + <address> + 123 Example Ave. + Example, EX + +See docinfo_ for a more complete example, including processing +context. + + +``admonition`` +============== + +This element is a generic, titled admonition. Also see the specific +admonition elements Docutils offers (in alphabetical order): caution_, +danger_, error_, hint_, important_, note_, tip_, warning_. + + +Details +------- + +:Category: + `Compound Body Elements`_ + +:Analogues: + ``admonition`` has no direct analogues in common DTDs. It can be + emulated with primitives and type effects. + +:Processing: + Rendered distinctly (inset and/or in a box, etc.). + +:Parents: + All elements employing the `%body.elements;`_ or + `%structure.model;`_ parameter entities in their content models + may contain ``admonition``. + +:Children: + ``admonition`` elements begin with a title_ and may contain one or + more `body elements`_. :: + + (title_, (`%body.elements;`_)+) + +:Attributes: + The ``admonition`` element contains only the `common attributes`_. + +:Parameter Entities: + The `%body.elements;`_ parameter entity directly includes + ``admonition``. The `%structure.model;`_ parameter entity + indirectly includes ``admonition``. + + +Examples +-------- + +reStructuredText source:: + + .. admonition:: And, by the way... + + You can make up your own admonition too. + +Pseudo-XML_ fragment from simple parsing:: + + <admonition class="admonition-and-by-the-way"> + <title> + And, by the way... + <paragraph> + You can make up your own admonition too. + + +``attention`` +============= + +The ``attention`` element is an admonition, a distinctive and +self-contained notice. Also see the other admonition elements +Docutils offers (in alphabetical order): caution_, danger_, error_, +hint_, important_, note_, tip_, warning_, and the generic admonition_. + + +Details +------- + +:Category: + `Compound Body Elements`_ + +:Analogues: + ``attention`` has no direct analogues in common DTDs. It can be + emulated with primitives and type effects. + +:Processing: + Rendered distinctly (inset and/or in a box, etc.), with the + generated title "Attention!" (or similar). + + +:Parents: + All elements employing the `%body.elements;`_ or + `%structure.model;`_ parameter entities in their content models + may contain ``attention``. + +:Children: + ``attention`` elements contain one or more `body elements`_. + + .. parsed-literal:: + + (`%body.elements;`_)+ + +:Attributes: + The ``attention`` element contains only the `common attributes`_. + +:Parameter Entities: + The `%body.elements;`_ parameter entity directly includes + ``attention``. The `%structure.model;`_ parameter entity + indirectly includes ``attention``. + + +Examples +-------- + +reStructuredText source:: + + .. Attention:: All your base are belong to us. + +Pseudo-XML_ fragment from simple parsing:: + + <attention> + <paragraph> + All your base are belong to us. + + +``attribution`` +=============== + +`To be completed`_. + + +``author`` +========== + +The ``author`` element holds the name of the author of the document. + + +Details +------- + +:Category: + `Bibliographic Elements`_ + +:Analogues: + ``author`` is analogous to the DocBook "author" element. + +:Processing: + See docinfo_. + +:Parents: + The following elements may contain ``author``: docinfo_, authors_ + +:Children: + ``author`` elements may contain text data plus `inline elements`_. + + .. parsed-literal:: + + `%text.model;`_ + +:Attributes: + The ``author`` element contains only the `common attributes`_. + +:Parameter Entities: + The `%bibliographic.elements;`_ parameter entity directly includes + ``author``. + + +Examples +-------- + +reStructuredText_ source:: + + Document Title + ============== + + :Author: J. Random Hacker + +Complete pseudo-XML_ result after parsing and applying transforms:: + + <document ids="document-title" names="document title"> + <title> + Document Title + <docinfo> + <author> + J. Random Hacker + +See docinfo_ for a more complete example, including processing +context. + + +``authors`` +=========== + +The ``authors`` element is a container for author information for +documents with multiple authors. + + +Details +------- + +:Category: + `Bibliographic Elements`_ + +:Analogues: + ``authors`` is analogous to the DocBook "authors" element. + +:Processing: + See docinfo_. + +:Parents: + Only the docinfo_ element contains ``authors``. + +:Children: + ``authors`` elements may contain the following elements: author_, + organization_, address_, contact_:: + + ((author, organization?, address?, contact?)+) + +:Attributes: + The ``authors`` element contains only the `common attributes`_. + +:Parameter Entities: + The `%bibliographic.elements;`_ parameter entity directly includes + ``authors``. + + +Examples +-------- + +reStructuredText_ source:: + + Document Title + ============== + + :Authors: J. Random Hacker; Jane Doe + +Complete pseudo-XML_ result after parsing and applying transforms:: + + <document ids="document-title" names="document title"> + <title> + Document Title + <docinfo> + <authors> + <author> + J. Random Hacker + <author> + Jane Doe + +In reStructuredText, multiple author's names are separated with +semicolons (";") or commas (","); semicolons take precedence. There +is currently no way to represent the author's organization, address, +or contact in a reStructuredText "Authors" field. + +See docinfo_ for a more complete example, including processing +context. + + +``block_quote`` +=============== + +The ``block_quote`` element is used for quotations set off from the +main text (standalone). + + +Details +------- + +:Category: + `Compound Body Elements`_ + +:Analogues: + ``block_quote`` is analogous to the "blockquote" element in both + HTML and DocBook. + +:Processing: + ``block_quote`` elements serve to set their contents off from the + main text, typically with indentation and/or other decoration. + +:Parents: + All elements employing the `%body.elements;`_ or + `%structure.model;`_ parameter entities in their content models + may contain ``block_quote``. + +:Children: + ``block_quote`` elements contain `body elements`_ followed by an + optional attribution_ element. + + .. parsed-literal:: + + ((`%body.elements;`_)+, attribution?) + +:Attributes: + The ``block_quote`` element contains only the `common attributes`_. + +:Parameter Entities: + The `%body.elements;`_ parameter entity directly includes + ``block_quote``. The `%structure.model;`_ parameter entity + indirectly includes ``block_quote``. + + +Examples +-------- + +reStructuredText source:: + + As a great paleontologist once said, + + This theory, that is mine, is mine. + + -- Anne Elk (Miss) + +Pseudo-XML_ fragment from simple parsing:: + + <paragraph> + As a great paleontologist once said, + <block_quote> + <paragraph> + This theory, that is mine, is mine. + <attribution> + Anne Elk (Miss) + + +``bullet_list`` +=============== + +The ``bullet_list`` element contains list_item_ elements which are +uniformly marked with bullets. Bullets are typically simple dingbats +(symbols) such as circles and squares. + + +Details +------- + +:Category: + `Compound Body Elements`_ + +:Analogues: + ``bullet_list`` is analogous to the HTML "ul" element and to the + DocBook "itemizedlist" element. HTML's "ul" is short for + "unordered list", which we consider to be a misnomer. "Unordered" + implies that the list items may be randomly rearranged without + affecting the meaning of the list. Bullet lists *are* often + ordered; the ordering is simply left implicit. + +:Processing: + Each list item should begin a new vertical block, prefaced by a + bullet/dingbat. + +:Parents: + All elements employing the `%body.elements;`_ or + `%structure.model;`_ parameter entities in their content models + may contain ``bullet_list``. + +:Children: + ``bullet_list`` elements contain one or more list_item_ elements:: + + (list_item_+) + +:Attributes: + The ``bullet_list`` element contains the `common attributes`_ + plus bullet_. + + ``bullet`` is used to record the style of bullet from the input + data. In documents processed from reStructuredText_, it contains + one of "-", "+", or "*". It may be ignored in processing. + +:Parameter Entities: + The `%body.elements;`_ parameter entity directly includes + ``bullet_list``. The `%structure.model;`_ parameter entity + indirectly includes ``bullet_list``. + + +Examples +-------- + +reStructuredText_ source:: + + - Item 1, paragraph 1. + + Item 1, paragraph 2. + + - Item 2. + +Pseudo-XML_ fragment from simple parsing:: + + <bullet_list bullet="-"> + <list_item> + <paragraph> + Item 1, paragraph 1. + <paragraph> + Item 1, paragraph 2. + <list_item> + <paragraph> + Item 2. + +See list_item_ for another example. + + +``caption`` +=========== + +`To be completed`_. + + +``caution`` +=========== + +The ``caution`` element is an admonition, a distinctive and +self-contained notice. Also see the other admonition elements +Docutils offers (in alphabetical order): attention_, danger_, error_, +hint_, important_, note_, tip_, warning_, and the generic admonition_. + + +Details +------- + +:Category: + `Compound Body Elements`_ + +:Analogues: + ``caution`` is analogous to the `DocBook "caution"`_ element. + +:Processing: + Rendered distinctly (inset and/or in a box, etc.), with the + generated title "Caution" (or similar). + +.. _DocBook "caution": https://tdg.docbook.org/tdg/5.1/caution.html + +:Parents: + All elements employing the `%body.elements;`_ or + `%structure.model;`_ parameter entities in their content models + may contain ``caution``. + +:Children: + ``caution`` elements contain one or more `body elements`_. + + .. parsed-literal:: + + (`%body.elements;`_)+ + +:Attributes: + The ``caution`` element contains only the `common attributes`_. + +:Parameter Entities: + The `%body.elements;`_ parameter entity directly includes + ``caution``. The `%structure.model;`_ parameter entity + indirectly includes ``caution``. + + +Examples +-------- + +reStructuredText source:: + + .. Caution:: Don't take any wooden nickels. + +Pseudo-XML_ fragment from simple parsing:: + + <caution> + <paragraph> + Don't take any wooden nickels. + + +``citation`` +============ + +`To be completed`_. + + +``citation_reference`` +====================== + +`To be completed`_. + + +``classifier`` +============== + +The ``classifier`` element contains the classification or type of the +term_ being defined in a definition_list_. For example, it can be +used to indicate the type of a variable. + + +Details +------- + +:Category: + `Body Subelements`_ (simple) + +:Analogues: + ``classifier`` has no direct analogues in common DTDs. It can be + emulated with primitives or type effects. + +:Processing: + See definition_list_item_. + +:Parents: + Only the definition_list_item_ element contains ``classifier``. + +:Children: + ``classifier`` elements may contain text data plus `inline elements`_. + + .. parsed-literal:: + + `%text.model;`_ + +:Attributes: + The ``classifier`` element contains only the `common attributes`_. + + +Examples +-------- + +Here is a hypothetical data dictionary. reStructuredText_ source:: + + name : string + Customer name. + i : int + Temporary index variable. + +Pseudo-XML_ fragment from simple parsing:: + + <definition_list> + <definition_list_item> + <term> + name + <classifier> + string + <definition> + <paragraph> + Customer name. + <definition_list_item> + <term> + i + <classifier> + int + <definition> + <paragraph> + Temporary index variable. + + +``colspec`` +=========== + +Specifications for a column in a table_. + + +Details +------- + +:Category: + `Body Subelements`_ (simple) + +:Analogues: + ``colspec`` is based on the [exchange-table-model]_ and + analogous to the DocBook "colspec" element. + +:Processing: + The ``colspec`` element contains layout information for the parent + table_. + +:Parents: + Only the tgroup_ element contains ``colspec``. + +:Children: + ``colspec`` is an empty element and has no children. + +:Attributes: + The ``colspec`` element contains the optional "colnum", "colname", + "colwidth", "colsep", "rowsep", "align", "char", and "charoff" + attributes defined in the exchange-table-model_ plus the + `common attributes`_ and `stub`_. + + Docutils uses only colwidth_ and stub_. + + .. attention:: + + In contrast to the definition in the exchange-table-model_, + unitless values of the "colwidth" are interpreted as proportional + values, not fixed values with unit "pt". + + .. The reference implementation `html4css2` converts column + widths values to percentages. + + Future versions of Docutils may use the standard form + ``number*``, e.g., “5*” for 5 times the proportion. + +Examples +-------- + +See table_. + + +``comment`` +=========== + +`To be completed`_. + + +``compound`` +============ + +`To be completed`_. + + +``contact`` +=========== + +The ``contact`` element holds contact information for the author +(individual or group) of the document, or a third-party contact. It +is typically used for an email or web address. + + +Details +------- + +:Category: + `Bibliographic Elements`_ + +:Analogues: + ``contact`` is analogous to the DocBook "email" element. The HTML + "address" element serves a similar purpose. + +:Processing: + See docinfo_. + +:Parents: + The following elements may contain ``contact``: docinfo_, authors_ + +:Children: + ``contact`` elements may contain text data plus `inline + elements`_. + + .. parsed-literal:: + + `%text.model;`_ + +:Attributes: + The ``contact`` element contains only the `common attributes`_. + +:Parameter Entities: + The `%bibliographic.elements;`_ parameter entity directly includes + ``contact``. + + +Examples +-------- + +reStructuredText_ source:: + + Document Title + ============== + + :Contact: jrh@example.com + +Complete pseudo-XML_ result after parsing and applying transforms:: + + <document ids="document-title" names="document title"> + <title> + Document Title + <docinfo> + <contact> + <reference refuri="mailto:jrh@example.com"> + jrh@example.com + +See docinfo_ for a more complete example, including processing +context. + + +``container`` +============= + +`To be completed`_. + + +``copyright`` +============= + +The ``copyright`` element contains the document's copyright statement. + + +Details +------- + +:Category: + `Bibliographic Elements`_ + +:Analogues: + ``copyright`` is analogous to the DocBook "copyright" element. + +:Processing: + See docinfo_. + +:Parents: + Only the docinfo_ element contains ``copyright``. + +:Children: + ``copyright`` elements may contain text data plus `inline + elements`_. + + .. parsed-literal:: + + `%text.model;`_ + +:Attributes: + The ``copyright`` element contains only the `common attributes`_. + +:Parameter Entities: + The `%bibliographic.elements;`_ parameter entity directly includes + ``copyright``. + + +Examples +-------- + +reStructuredText_ source:: + + Document Title + ============== + + :Copyright: This document has been placed in the public domain. + +Complete pseudo-XML_ result after parsing and applying transforms:: + + <document ids="document-title" names="document title"> + <title> + Document Title + <docinfo> + <copyright> + This document has been placed in the public domain. + +See docinfo_ for a more complete example, including processing +context. + + +``danger`` +========== + +The ``danger`` element is an admonition, a distinctive and +self-contained notice. Also see the other admonition elements +Docutils offers (in alphabetical order): attention_, caution_, error_, +hint_, important_, note_, tip_, warning_, and the generic admonition_. + + +Details +------- + +:Category: + `Compound Body Elements`_ + +:Analogues: + ``danger`` has no direct analogues in common DTDs. It can be + emulated with primitives and type effects. + +:Processing: + Rendered distinctly (inset and/or in a box, etc.), with the + generated title "!DANGER!" (or similar). + +:Parents: + All elements employing the `%body.elements;`_ or + `%structure.model;`_ parameter entities in their content models + may contain ``danger``. + +:Children: + ``danger`` elements contain one or more `body elements`_. + + .. parsed-literal:: + + (`%body.elements;`_)+ + +:Attributes: + The ``danger`` element contains only the `common attributes`_. + +:Parameter Entities: + The `%body.elements;`_ parameter entity directly includes + ``danger``. The `%structure.model;`_ parameter entity + indirectly includes ``danger``. + + +Examples +-------- + +reStructuredText source:: + + .. DANGER:: Mad scientist at work! + +Pseudo-XML_ fragment from simple parsing:: + + <danger> + <paragraph> + Mad scientist at work! + + +``date`` +======== + +The ``date`` element contains the date of publication, release, or +last modification of the document. + + +Details +------- + +:Category: + `Bibliographic Elements`_ + +:Analogues: + ``date`` is analogous to the DocBook "date" element. + +:Processing: + Often used with the RCS/CVS keyword "Date". See docinfo_. + +:Parents: + Only the docinfo_ element contains ``date``. + +:Children: + ``date`` elements may contain text data plus `inline elements`_. + + .. parsed-literal:: + + `%text.model;`_ + +:Attributes: + The ``date`` element contains only the `common attributes`_. + +:Parameter Entities: + The `%bibliographic.elements;`_ parameter entity directly includes + ``date``. + + +Examples +-------- + +reStructuredText_ source:: + + Document Title + ============== + + :Date: 2002-08-20 + +Complete pseudo-XML_ result after parsing and applying transforms:: + + <document ids="document-title" names="document title"> + <title> + Document Title + <docinfo> + <date> + 2002-08-20 + +See docinfo_ for a more complete example, including processing +context. + + +``decoration`` +============== + +The ``decoration`` element is a container for header_ and footer_ +elements and potential future extensions. These elements are used for +notes, time/datestamp, processing information, etc. + + +Details +------- + +:Category: + `Structural Subelements`_ + +:Analogues: + There are no direct analogies to ``decoration`` in HTML or in + DocBook. Equivalents are typically constructed from primitives + and/or generated by the processing system. + +:Processing: + See the individual `decorative elements`_. + +:Parents: + Only the document_ element contains ``decoration``. + +:Children: + ``decoration`` elements may contain `decorative elements`_. + + .. parsed-literal:: + + (header_?, footer_?) + +Although the content model doesn't specifically require contents, no +empty ``decoration`` elements are ever created. + +:Attributes: + The ``decoration`` element contains only the `common attributes`_. + + +Examples +-------- + +reStructuredText_ source:: + + A paragraph. + +Complete pseudo-XML_ result after parsing and applying transforms, +assuming that the datestamp command-line option or configuration +setting has been supplied:: + + <document> + <decoration> + <footer> + <paragraph> + Generated on: 2002-08-20. + <paragraph> + A paragraph. + + +``definition`` +============== + +The ``definition`` element is a container for the body elements used +to define a term_ in a definition_list_. + + +Details +------- + +:Category: + `Body Subelements`_ (compound) + +:Analogues: + ``definition`` is analogous to the HTML "dd" element and to the + DocBook "listitem" element (inside a "variablelistentry" element). + +:Processing: + See definition_list_item_. + +:Parents: + Only definition_list_item_ elements contain ``definition``. + +:Children: + ``definition`` elements contain `body elements`_. + + .. parsed-literal:: + + (`%body.elements;`_)+ + +:Attributes: + The ``definition`` element contains only the `common attributes`_. + + +Examples +-------- + +See the examples for the definition_list_, definition_list_item_, and +classifier_ elements. + + +``definition_list`` +=================== + +The ``definition_list`` element contains a list of terms and their +definitions. It can be used for glossaries or dictionaries, to +describe or classify things, for dialogues, or to itemize subtopics +(such as in this reference). + + +Details +------- + +:Category: + `Compound Body Elements`_ + +:Parents: + All elements employing the `%body.elements;`_ or + `%structure.model;`_ parameter entities in their content models + may contain ``definition_list``. + +:Children: + ``definition_list`` elements contain one or more + definition_list_item_ elements. + +:Analogues: + ``definition_list`` is analogous to the HTML "dl" element and to + the DocBook "variablelist" element. + +:Processing: + See definition_list_item_. + +:Attributes: + The ``definition_list`` element contains only the `common + attributes`_. + +:Parameter Entities: + The `%body.elements;`_ parameter entity directly includes + ``definition_list``. The `%structure.model;`_ parameter entity + indirectly includes ``definition_list``. + + +Examples +-------- + +reStructuredText_ source:: + + Term + Definition. + + Term : classifier + The ' : ' indicates a classifier in + definition list item terms only. + +Pseudo-XML_ fragment from simple parsing:: + + <definition_list> + <definition_list_item> + <term> + Term + <definition> + <paragraph> + Definition. + <definition_list_item> + <term> + Term + <classifier> + classifier + <definition> + <paragraph> + The ' : ' indicates a classifier in + definition list item terms only. + +See definition_list_item_ and classifier_ for further examples. + + +``definition_list_item`` +======================== + +The ``definition_list_item`` element contains a single +term_/definition_ pair (with optional classifier_). + + +Details +------- + +:Category: + `Body Subelements`_ (compound) + +:Analogues: + ``definition_list_item`` is analogous to the DocBook + "variablelistentry" element. + +:Processing: + The optional classifier_ can be rendered differently from the + term_. They should be separated visually, typically by spaces + plus a colon or dash. + +:Parents: + Only the definition_list_ element contains + ``definition_list_item``. + +:Children: + ``definition_list_item`` elements each contain a single term_, + an optional classifier_, and a definition_:: + + (term, classifier?, definition) + +:Attributes: + The ``definition_list_item`` element contains only the `common + attributes`_. + + +Examples +-------- + +reStructuredText_ source:: + + Tyrannosaurus Rex : carnivore + Big and scary; the "Tyrant King". + + Brontosaurus : herbivore + All brontosauruses are thin at one end, + much much thicker in the middle + and then thin again at the far end. + + -- Anne Elk (Miss) + +Pseudo-XML_ fragment from simple parsing:: + + <definition_list> + <definition_list_item> + <term> + Tyrannosaurus Rex + <classifier> + carnivore + <definition> + <paragraph> + Big and scary; the "Tyrant King". + <definition_list_item> + <term> + Brontosaurus + <classifier> + herbivore + <definition> + <paragraph> + All brontosauruses are thin at one end, + much much thicker in the middle + and then thin again at the far end. + <paragraph> + -- Anne Elk (Miss) + +See definition_list_ and classifier_ for further examples. + + +``description`` +=============== + +The ``description`` element contains body elements, describing the +purpose or effect of a command-line option or group of options. + + +Details +------- + +:Category: + `Body Subelements`_ + +:Analogues: + ``description`` has no direct analogues in common DTDs. + +:Processing: + See option_list_. + +:Parents: + Only the option_list_item_ element contains ``description``. + +:Children: + ``description`` elements may contain `body elements`_. + + .. parsed-literal:: + + (`%body.elements;`_)+ + +:Attributes: + The ``description`` element contains only the `common attributes`_. + + +Examples +-------- + +See the examples for the option_list_ element. + + +``docinfo`` +=========== + +The ``docinfo`` element is a container for displayed document bibliographic +data, or meta-data (data about the document). It corresponds to the +front matter of a book, such as the title page and copyright page. + +See also the meta_ element (for "hidden" meta-data). + +Details +------- + +:Category: + `Structural Subelements`_ + +:Analogues: + ``docinfo`` is analogous to DocBook "info" elements ("bookinfo" + etc.). There are no directly analogous HTML elements; the "meta" + element carries some of the same information, albeit invisibly. + +:Processing: + The ``docinfo`` element may be rendered as a two-column table or + in other styles. It may even be invisible or omitted from the + processed output. Meta-data may be extracted from ``docinfo`` + children; for example, HTML ``<meta>`` tags may be constructed. + + When Docutils_ transforms a reStructuredText_ field_list_ into a + ``docinfo`` element (see the examples below), RCS/CVS keywords are + normally stripped from simple (one paragraph) field bodies. For + complete details, please see `RCS Keywords`_ in the + `reStructuredText Markup Specification`_. + + .. _RCS Keywords: rst/restructuredtext.html#rcs-keywords + +:Parents: + Only the document_ element contains ``docinfo``. + +:Children: + ``docinfo`` elements contain `bibliographic elements`_. + + .. parsed-literal:: + + (`%bibliographic.elements;`_)+ + +:Attributes: + The ``docinfo`` element contains only the `common attributes`_. + + +Examples +-------- + +Docinfo is represented in reStructuredText_ by a field_list_ in a +bibliographic context: the first non-comment element of a document_, +after any document title_/subtitle_. The field list is transformed +into a ``docinfo`` element and its children by a transform. Source:: + + Docinfo Example + =============== + + :Author: J. Random Hacker + :Contact: jrh@example.com + :Date: 2002-08-18 + :Status: Work In Progress + :Version: 1 + :Filename: $RCSfile$ + :Copyright: This document has been placed in the public domain. + +Complete pseudo-XML_ result after parsing and applying transforms:: + + <document ids="docinfo-example" names="docinfo example"> + <title> + Docinfo Example + <docinfo> + <author> + J. Random Hacker + <contact> + <reference refuri="mailto:jrh@example.com"> + jrh@example.com + <date> + 2002-08-18 + <status> + Work In Progress + <version> + 1 + <field> + <field_name> + Filename + <field_body> + <paragraph> + doctree.txt + <copyright> + This document has been placed in the public domain. + +Note that "Filename" is a non-standard ``docinfo`` field, so becomes a +generic ``field`` element. Also note that the "RCSfile" keyword +syntax has been stripped from the "Filename" data. + +See field_list_ for an example in a non-bibliographic context. Also +see the individual examples for the various `bibliographic elements`_. + + +``doctest_block`` +================= + +The ``doctest_block`` element is a Python-specific variant of +literal_block_. It is a block of text where line breaks and +whitespace are significant and must be preserved. ``doctest_block`` +elements are used for interactive Python interpreter sessions, which +are distinguished by their input prompt: ``>>>``. They are meant to +illustrate usage by example, and provide an elegant and powerful +testing environment via the `doctest module`_ in the Python standard +library. + +.. _doctest module: + https://docs.python.org/3/library/doctest.html + + +Details +------- + +:Category: + `Simple Body Elements`_ + +:Analogues: + ``doctest_block`` is analogous to the HTML "pre" element and to + the DocBook "programlisting" and "screen" elements. + +:Processing: + As with literal_block_, ``doctest_block`` elements are typically + rendered in a monospaced typeface. It is crucial that all + whitespace and line breaks are preserved in the rendered form. + +:Parents: + All elements employing the `%body.elements;`_ or + `%structure.model;`_ parameter entities in their content models + may contain ``doctest_block``. + +:Children: + ``doctest_block`` elements may contain text data plus `inline + elements`_. + + .. parsed-literal:: + + `%text.model;`_ + +:Attributes: + The ``doctest_block`` element contains the `common attributes`_ + plus `xml:space`_. + +:Parameter Entities: + The `%body.elements;`_ parameter entity directly includes + ``doctest_block``. The `%structure.model;`_ parameter entity + indirectly includes ``doctest_block``. + + +Examples +-------- + +reStructuredText source:: + + This is an ordinary paragraph. + + >>> print 'this is a Doctest block' + this is a Doctest block + +Pseudo-XML_ fragment from simple parsing:: + + <paragraph> + This is an ordinary paragraph. + <doctest_block xml:space="preserve"> + >>> print 'this is a Doctest block' + this is a Doctest block + + +``document`` +============ + +The ``document`` element is the root (topmost) element of the Docutils +document tree. ``document`` is the direct or indirect ancestor of +every other element in the tree. It encloses the entire document +tree. It is the starting point for a document. + + +Details +------- + +:Category: + `Structural Elements`_ + +:Analogues: + ``document`` is analogous to the HTML "html" element and to + several DocBook elements such as "book". + +:Parents: + The ``document`` element has no parents. + +:Children: + ``document`` elements may contain `structural subelements`_, + `structural elements`_, and `body elements`_. + + .. parsed-literal:: + + ( (title_, subtitle_?)?, + decoration_?, + (docinfo_, transition_?)?, + `%structure.model;`_ ) + +Depending on the source of the data and the stage of processing, the +"document" may not initially contain a "title". A document title is +not directly representable in reStructuredText_. Instead, a lone +top-level section may have its title promoted to become the document +title_, and similarly for a lone second-level (sub)section's title to +become the document subtitle_. + +The contents of "decoration_" may be specified in a document, +constructed programmatically, or both. The "docinfo_" may be +transformed from an initial field_list_. + +See the `%structure.model;`_ parameter entity for details of the body +of a ``document``. + +:Attributes: + The ``document`` element contains the `common attributes`_ (ids_, + names_, dupnames_, source_, and classes_), plus an optional + `title attribute`_ which stores the document title metadata. + +Examples +-------- + +reStructuredText_ source:: + + A Title + ======= + + A paragraph. + +Complete pseudo-XML_ result from simple parsing:: + + <document> + <section ids="a-title" names="a title"> + <title> + A Title + <paragraph> + A paragraph. + +After applying transforms, the section title is promoted to become the +document title:: + + <document ids="a-title" names="a title"> + <title> + A Title + <paragraph> + A paragraph. + + +``emphasis`` +============ + +`To be completed`_. + + +``entry`` +========= + +`To be completed`_. + + +``enumerated_list`` +=================== + +The ``enumerated_list`` element contains list_item_ elements which are +uniformly marked with enumerator labels. + + +Details +------- + +:Category: + `Compound Body Elements`_ + +:Analogues: + ``enumerated_list`` is analogous to the HTML "ol" element and to + the DocBook "orderedlist" element. + +:Processing: + Each list item should begin a new vertical block, prefaced by a + enumeration marker (such as "1."). + +:Parents: + All elements employing the `%body.elements;`_ or + `%structure.model;`_ parameter entities in their content models + may contain ``enumerated_list``. + +:Children: + ``enumerated_list`` elements contain one or more list_item_ + elements:: + + (list_item+) + +:Attributes: + The ``enumerated_list`` element contains the `common attributes`_ + plus enumtype_, + prefix_, suffix_, and start_. + + ``enumtype`` is used to record the intended enumeration sequence, + one of "arabic" (1, 2, 3, ...), "loweralpha" (a, b, c, ..., z), + "upperalpha" (A, B, C, ..., Z), "lowerroman" (i, ii, iii, iv, ..., + mmmmcmxcix [4999]), or "upperroman" (I, II, III, IV, ..., + MMMMCMXCIX [4999]). + + ``prefix`` stores the formatting characters used before the + enumerator. In documents originating from reStructuredText_ data, + it will contain either "" (empty string) or "(" (left + parenthesis). It may or may not affect processing. + + ``suffix`` stores the formatting characters used after the + enumerator. In documents originating from reStructuredText_ data, + it will contain either "." (period) or ")" (right parenthesis). + Depending on the capabilities of the output format, this attribute + may or may not affect processing. + + ``start`` contains the ordinal value of the first item in the + list, in decimal. For lists beginning at value 1 ("1", "a", "A", + "i", or "I"), this attribute may be omitted. + +:Parameter Entities: + The `%body.elements;`_ parameter entity directly includes + ``enumerated_list``. The `%structure.model;`_ parameter entity + indirectly includes ``enumerated_list``. + + +Examples +-------- + +reStructuredText_ source:: + + 1. Item 1. + + (A) Item A. + (B) Item B. + (C) Item C. + + 2. Item 2. + +Pseudo-XML_ fragment from simple parsing:: + + <enumerated_list enumtype="arabic" prefix="" suffix="."> + <list_item> + <paragraph> + Item 1. + <enumerated_list enumtype="upperalpha" prefix="(" suffix=")"> + <list_item> + <paragraph> + Item A. + <list_item> + <paragraph> + Item B. + <list_item> + <paragraph> + Item C. + <list_item> + <paragraph> + Item 2. + +See list_item_ for another example. + + +``error`` +========= + +The ``error`` element is an admonition, a distinctive and +self-contained notice. Also see the other admonition elements +Docutils offers (in alphabetical order): attention_, caution_, +danger_, hint_, important_, note_, tip_, warning_, and the generic +admonition_. + + +Details +------- + +:Category: + `Compound Body Elements`_ + +:Analogues: + ``error`` has no direct analogues in common DTDs. It can be + emulated with primitives and type effects. + +:Processing: + Rendered distinctly (inset and/or in a box, etc.), with the + generated title "Error" (or similar). + +:Parents: + All elements employing the `%body.elements;`_ or + `%structure.model;`_ parameter entities in their content models + may contain ``error``. + +:Children: + ``error`` elements contain one or more `body elements`_. + + .. parsed-literal:: + + (`%body.elements;`_)+ + +:Attributes: + The ``error`` element contains only the `common attributes`_. + +:Parameter Entities: + The `%body.elements;`_ parameter entity directly includes + ``error``. The `%structure.model;`_ parameter entity indirectly + includes ``error``. + + +Examples +-------- + +reStructuredText source:: + + .. Error:: Does not compute. + +Pseudo-XML_ fragment from simple parsing:: + + <error> + <paragraph> + Does not compute. + + +``field`` +========= + +The ``field`` element contains a pair of field_name_ and field_body_ +elements. + + +Details +------- + +:Category: + `Body Subelements`_ + +:Analogues: + ``field`` has no direct analogues in common DTDs. + +:Processing: + See field_list_. + +:Parents: + The following elements may contain ``field``: docinfo_, + field_list_ + +:Children: + Each ``field`` element contains one field_name_ and one + field_body_ element:: + + (field_name, field_body) + +:Attributes: + The ``field`` element contains only the `common attributes`_. + +:Parameter Entities: + The `%bibliographic.elements;`_ parameter entity directly includes + ``field``. + + +Examples +-------- + +See the examples for the field_list_ and docinfo_ elements. + + +``field_body`` +============== + +The ``field_body`` element contains body elements. It is analogous to +a database field's data. + + +Details +------- + +:Category: + `Body Subelements`_ + +:Analogues: + ``field_body`` has no direct analogues in common DTDs. + +:Processing: + See field_list_. + +:Parents: + Only the field_ element contains ``field_body``. + +:Children: + ``field_body`` elements may contain `body elements`_. + + .. parsed-literal:: + + (`%body.elements;`_)* + +:Attributes: + The ``field_body`` element contains only the `common attributes`_. + + +Examples +-------- + +See the examples for the field_list_ and docinfo_ elements. + + +``field_list`` +============== + +The ``field_list`` element contains two-column table-like structures +resembling database records (label & data pairs). Field lists are +often meant for further processing. In reStructuredText_, field lists +are used to represent bibliographic fields (contents of the docinfo_ +element) and `directive options`_. + +Details +------- + +:Category: + `Compound Body Elements`_ + +:Analogues: + ``field_list`` has no direct analogues in common DTDs. It can be + emulated with primitives such as tables. + +:Processing: + A ``field_list`` is typically rendered as a two-column list, where + the first column contains "labels" (usually with a colon suffix). + However, field lists are often used for extension syntax or + special processing. Such structures do not survive as field lists + to be rendered. + +:Parents: + All elements employing the `%body.elements;`_ or + `%structure.model;`_ parameter entities in their content models + may contain ``field_list``. + +:Children: + ``field_list`` elements contain one or more field_ elements. :: + + (field+) + +:Attributes: + The ``field_list`` element contains only the `common attributes`_. + +:Parameter Entities: + The `%body.elements;`_ parameter entity directly includes + ``field_list``. The `%structure.model;`_ parameter entity + indirectly includes ``field_list``. + + +Examples +-------- + +reStructuredText_ source:: + + :Author: Me + :Version: 1 + :Date: 2001-08-11 + :Parameter i: integer + +Pseudo-XML_ fragment from simple parsing:: + + <field_list> + <field> + <field_name> + Author + <field_body> + <paragraph> + Me + <field> + <field_name> + Version + <field_body> + <paragraph> + 1 + <field> + <field_name> + Date + <field_body> + <paragraph> + 2001-08-11 + <field> + <field_name> + Parameter i + <field_body> + <paragraph> + integer + +.. _directive options: rst/restructuredtext.html#directive-options + + +``field_name`` +============== + +The ``field_name`` element contains text; it is analogous to a +database field's name. + + +Details +------- + +:Category: + `Body Subelements`_ (simple) + +:Analogues: + ``field_name`` has no direct analogues in common DTDs. + +:Processing: + See field_list_. + +:Parents: + Only the field_ element contains ``field_name``. + +:Children: + ``field_name`` elements may contain text data plus `inline elements`_. + + .. parsed-literal:: + + `%text.model;`_ + +:Attributes: + The ``field_name`` element contains only the `common attributes`_. + + +Examples +-------- + +See the examples for the field_list_ and docinfo_ elements. + + +``figure`` +========== + +`To be completed`_. + + +``footer`` +========== + +The ``footer`` element is a container element whose contents are meant +to appear at the bottom of a web page, or repeated at the bottom of +every printed page. The ``footer`` element may contain processing +information (datestamp, a link to Docutils_, etc.) as well as custom +content. + + +Details +------- + +:Category: + `Decorative Elements`_ + +:Analogues: + ``footer`` is analogous to the HTML5 "footer" element. + There are no direct analogies to ``footer`` in HTML4 or DocBook. + Equivalents are typically constructed from primitives and/or + generated by the processing system. + +:Parents: + Only the decoration_ element contains ``footer``. + +:Children: + ``footer`` elements may contain `body elements`_. + + .. parsed-literal:: + + (`%body.elements;`_)+ + +:Attributes: + The ``footer`` element contains only the `common attributes`_. + + +Examples +-------- + +reStructuredText_ source:: + + A paragraph. + +Complete pseudo-XML_ result after parsing and applying transforms, +assuming that the datestamp command-line option or configuration +setting has been supplied:: + + <document> + <decoration> + <footer> + <paragraph> + Generated on: 2002-08-20. + <paragraph> + A paragraph. + + +``footnote`` +============ + +The ``footnote`` element is used for labeled notes_ that provide +additional context to a passage of text (*footnotes* or *endnotes*). +The corresponding footnote mark in running text is set by the +`footnote_reference`_ element. + +.. _notes: https://en.wikipedia.org/wiki/Note_(typography) + + +Details +------- + +:Category: + `Compound Body Elements`_ + +:Analogues: + ``footnote`` has no direct analogues in DocBook or HTML. + + The `DocBook "footnote"`_ element combines features of ``footnote`` + and footnote_reference_. + + The `ARIA role "note"`__ may be used to mark a (conforming__) + `HTML emulation`__ as "a section whose content is parenthetic or + ancillary to the main content of the resource". + + Depending on the note's position, the `epub:type`__ *footnote* or + *endnote* and the DPub ARIA role `"doc-footnote"`__ or + `"doc-endnote"`__ may be applicable. + + .. _DocBook "footnote": https://tdg.docbook.org/tdg/5.1/footnote.html + __ https://www.w3.org/TR/wai-aria-1.1/#note + __ https://www.w3.org/TR/html-aria/#docconformance + __ https://www.w3.org/TR/html51/ + common-idioms-without-dedicated-elements.html#footnotes + __ https://idpf.github.io/epub-vocabs/structure/#notes + __ https://www.w3.org/TR/dpub-aria-1.0/#doc-footnote + __ https://www.w3.org/TR/dpub-aria-1.0/#doc-endnote + +:Processing: + A ``footnote`` element should be set off from the rest of the + document, e.g. with a border or using a smaller font size. + + Footnotes may "float" to the bottom or margin of a page or a + dedicated section. + +:Parents: + All elements employing the `%body.elements;`_ or + `%structure.model;`_ parameter entities in their content models + may contain ``footnote``. + +:Children: + ``footnote`` elements begin with an optional label_ + and contain `body elements`_. + + .. parsed-literal:: + + (label?, (`%body.elements;`_)+) + +:Attributes: + The ``footnote`` element contains the `common attributes`_ + plus auto_ and backrefs_. + +:Parameter Entities: + The `%body.elements;`_ parameter entity directly includes + ``footnote``. The `%structure.model;`_ parameter entity indirectly + includes ``footnote``. + + +Examples +-------- + +reStructuredText_ uses `explicit markup blocks`_ for footnotes:: + + .. [1] This is a footnote. + +Pseudo-XML_ fragment from simple parsing:: + + <footnote ids="id1" names="1"> + <label> + 1 + <paragraph> + This is a footnote. + +.. _explicit markup blocks: rst/restructuredtext.html#explicit-markup-blocks + + +``footnote_reference`` +====================== + +The ``footnote_reference`` element is an inline element representing a +cross reference to a footnote_ (a footnote mark). + + +Details +------- + +:Category: + `Inline Elements`_ + +:Analogues: + The ``footnote_reference`` element resembles the `DocBook + "footnoteref"`_ element or the LaTeX ``\footnotemark`` command. + + There is no equivalent in HTML. The ``<a>`` element can be used + to provide a link to the corresponding footnote. + + .. _DocBook "footnoteref": https://tdg.docbook.org/tdg/5.1/footnoteref.html + +:Processing: + A ``footnote_reference`` should generate a mark matching the label_ + of the referenced footnote. The mark is typically formatted as + superscript or enclosed i square brackets. + +:Parents: + All elements employing the `%inline.elements;`_ parameter entities in + their content models may contain ``footnote-reference``. + +:Children: + ``footnote_reference`` elements may contain text data. :: + + (#PCDATA) + +:Attributes: + The ``footnote_reference`` element contains the `common attributes`_ + plus auto_, refid_, and refname_. + + +Examples +-------- + +reStructuredText source fragment:: + + [#]_ is an auto-numbered footnote reference. + + .. [#] Auto-numbered footnote 1. + +Pseudo-XML_ fragment from simple parsing:: + + <paragraph> + <footnote_reference auto="1" ids="id1"> + is an auto-numbered footnote reference. + <footnote auto="1" ids="id3"> + <paragraph> + Auto-numbered footnote 1. + +The ``references.Footnotes`` Docutils transform_ resolves this to:: + + <paragraph> + <footnote_reference auto="1" ids="id1" refid="id2"> + 1 + is an auto-numbered footnote reference. + <footnote auto="1" backrefs="id1" ids="id2" names="1"> + <label> + 1 + <paragraph> + Auto-numbered footnote 1. + +.. _transform: api/.html + + +``generated`` +============= + +Docutils wraps ``generated`` elements around text that is inserted +(generated) by Docutils; i.e., text that was not in the document, like +section numbers inserted by the "sectnum" directive. + +`To be completed`_. + + +``header`` +========== + +The ``header`` element is a container element whose contents are meant +to appear at the top of a web page, or at the top of every printed +page. + + +Details +------- + +:Category: + `Decorative Elements`_ + +:Analogues: + ``header`` is analogous to the HTML5 "header" element. + There are no direct analogies to ``header`` in HTML4 or DocBook. + Equivalents are typically constructed from primitives and/or + generated by the processing system. + +:Parents: + Only the decoration_ element contains ``header``. + +:Children: + ``header`` elements may contain `body elements`_. + + .. parsed-literal:: + + (`%body.elements;`_)+ + +:Attributes: + The ``header`` element contains only the `common attributes`_. + + +Examples +-------- + +reStructuredText source fragment:: + + .. header:: This space for rent. + +Pseudo-XML_ fragment from simple parsing:: + + <document> + <decoration> + <header> + <paragraph> + This space for rent. + + +``hint`` +======== + +The ``hint`` element is an admonition, a distinctive and +self-contained notice. Also see the other admonition elements +Docutils offers (in alphabetical order): attention_, caution_, +danger_, error_, important_, note_, tip_, warning_, and the generic +admonition_. + + +Details +------- + +:Category: + `Compound Body Elements`_ + +:Analogues: + ``hint`` has no direct analogues in common DTDs. It can be + emulated with primitives and type effects. + +:Processing: + Rendered distinctly (inset and/or in a box, etc.), with the + generated title "Hint" (or similar). + +:Parents: + All elements employing the `%body.elements;`_ or + `%structure.model;`_ parameter entities in their content models + may contain ``hint``. + +:Children: + ``hint`` elements contain one or more `body elements`_. + + .. parsed-literal:: + + (`%body.elements;`_)+ + +:Attributes: + The ``hint`` element contains only the `common attributes`_. + +:Parameter Entities: + The `%body.elements;`_ parameter entity directly includes + ``hint``. The `%structure.model;`_ parameter entity indirectly + includes ``hint``. + + +Examples +-------- + +reStructuredText source:: + + .. Hint:: It's bigger than a bread box. + +Pseudo-XML_ fragment from simple parsing:: + + <hint> + <paragraph> + It's bigger than a bread box. + + +``image`` +========= + +:Attributes: + The ``image`` element contains the `common attributes`_ + plus uri, align_, alt, height_, width_, and scale_. + +`To be completed`_. + + +``important`` +============= + +The ``important`` element is an admonition, a distinctive and +self-contained notice. Also see the other admonition elements +Docutils offers (in alphabetical order): attention_, caution_, +danger_, error_, hint_, note_, tip_, warning_, and the generic +admonition_. + + +Details +------- + +:Category: + `Compound Body Elements`_ + +:Analogues: + ``important`` is analogous to the `DocBook "important"`_ element. + +:Processing: + Rendered distinctly (inset and/or in a box, etc.), with the + generated title "Important" (or similar). + +.. _DocBook "important": https://tdg.docbook.org/tdg/5.1/important.html + +:Parents: + All elements employing the `%body.elements;`_ or + `%structure.model;`_ parameter entities in their content models + may contain ``important``. + +:Children: + ``important`` elements contain one or more `body elements`_. + + .. parsed-literal:: + + (`%body.elements;`_)+ + +:Attributes: + The ``important`` element contains only the `common attributes`_. + +:Parameter Entities: + The `%body.elements;`_ parameter entity directly includes + ``important``. The `%structure.model;`_ parameter entity + indirectly includes ``important``. + + +Examples +-------- + +reStructuredText source:: + + .. Important:: + + * Wash behind your ears. + * Clean up your room. + * Back up your data. + * Call your mother. + +Pseudo-XML_ fragment from simple parsing:: + + <important> + <bullet_list> + <list_item> + <paragraph> + Wash behind your ears. + <list_item> + <paragraph> + Clean up your room. + <list_item> + <paragraph> + Back up your data. + <list_item> + <paragraph> + Call your mother. + + +``inline`` +========== + +The ``inline`` element is a generic inline container. + +Details +------- + +:Category: + `Inline Elements`_ + +:Analogues: + ``inline`` is analogous to the HTML "span" element. + +:Processing: + Writers typically pass the classes_ attribute to the output document + and leave styling to the backend or a custom stylesheet_. They may + also process the classes_ attribute and convert the ``inline`` + element to a specific element or render the content distinctly + for specific class values. Moreover, writers may ignore the classes + attribute and render the content as ordinary text. + +:Parents: + All elements employing the `%inline.elements;`_ parameter entities in + their content models may contain ``inline``. + +:Children: + ``inline`` elements may contain text data plus `inline elements`_. + + .. parsed-literal:: + + `%text.model;`_ + +:Attributes: + The ``inline`` element contains the `common attributes`_. + + +Examples +-------- + +`Custom interpreted text roles`_ create ``inline`` elements (unless they +are based on a `standard role`_). + +reStructuredText source fragment:: + + .. role:: custom + + An example of using :custom:`interpreted text` + +Pseudo-XML_ fragment from simple parsing:: + + + <paragraph> + An example of using + <inline classes="custom"> + interpreted text + +.. _stylesheet: ../user/config.html#stylesheet +.. _custom interpreted text roles: + rst/directives.html#custom-interpreted-text-roles +.. _standard role: rst/roles.html + + +``label`` +========= + +`To be completed`_. + + +``legend`` +========== + +`To be completed`_. + + +``line`` +======== + +The ``line`` element contains a single line of text, part of a +`line_block`_. + + +Details +------- + +:Category: + `Body Subelements`_ (simple) + +:Parents: + Only the `line_block`_ element contains ``line``. + +:Children: + ``line`` elements may contain text data plus `inline elements`_. + +:Analogues: + ``line`` has no direct analogues in common DTDs. It can be + emulated with primitives or type effects. + +:Processing: + See `line_block`_. + +:Parents: + All elements employing the `%inline.elements;`_ parameter entities in + their content models may contain ``inline``. + +:Children: + ``inline`` elements may contain text data plus `inline elements`_. + + .. parsed-literal:: + + `%text.model;`_ + +:Attributes: + The ``line`` element contains the `common attributes`_. + + +Examples +-------- + +See `line_block`_. + + +``line_block`` +============== + +The ``line_block`` element contains a sequence of lines and nested +line blocks. Line breaks (implied between elements) and leading +whitespace (indicated by nesting) is significant and must be +preserved. ``line_block`` elements are commonly used for verse and +addresses. See `literal_block`_ for an alternative useful for program +listings and interactive computer sessions. + + +Details +------- + +:Category: + `Compound Body Elements`_ + +:Analogues: + ``line_block`` is analogous to the DocBook "literallayout" element + and to the HTML "pre" element (with modifications to typeface + styles). + +:Processing: + Unlike ``literal_block``, ``line_block`` elements are typically + rendered in an ordinary text typeface. It is crucial that leading + whitespace and line breaks are preserved in the rendered form. + +:Parents: + All elements employing the `%body.elements;`_ or + `%structure.model;`_ parameter entities in their content models + may contain ``line_block``. + +:Children: + ``line_block`` elements may contain line_ elements and nested + line_block_ elements. :: + + (line | line_block)+ + +:Attributes: + The ``line_block`` element contains the `common attributes`_. + +:Parameter Entities: + The `%body.elements;`_ parameter entity directly includes + ``line_block``. The `%structure.model;`_ parameter entity + indirectly includes ``line_block``. + + +Examples +-------- + +Example source:: + + Take it away, Eric the Orchestra Leader! + + | A one, two, a one two three four + | + | Half a bee, philosophically, + | must, *ipso facto*, half not be. + | But half the bee has got to be, + | *vis a vis* its entity. D'you see? + | + | But can a bee be said to be + | or not to be an entire bee, + | when half the bee is not a bee, + | due to some ancient injury? + | + | Singing... + +Pseudo-XML_ fragment from simple parsing:: + + <paragraph> + Take it away, Eric the Orchestra Leader! + <line_block> + <line> + A one, two, a one two three four + <line> + <line> + Half a bee, philosophically, + <line_block> + <line> + must, + <emphasis> + ipso facto + , half not be. + <line> + But half the bee has got to be, + <line_block> + <line> + <emphasis> + vis a vis + its entity. D'you see? + <line> + <line> + But can a bee be said to be + <line_block> + <line> + or not to be an entire bee, + <line_block> + <line> + when half the bee is not a bee, + <line_block> + <line> + due to some ancient injury? + <line> + <line> + Singing... + + +``list_item`` +============= + +The ``list_item`` element is a container for the elements of a list +item. + + +Details +------- + +:Category: + `Body Subelements`_ (compound) + +:Analogues: + ``list_item`` is analogous to the HTML "li" element and to the + DocBook "listitem" element. + +:Processing: + See bullet_list_ or enumerated_list_. + +:Parents: + The bullet_list_ and enumerated_list_ elements contain + ``list_item``. + +:Children: + ``list_item`` elements may contain `body elements`_. + + .. parsed-literal:: + + (`%body.elements;`_)* + +:Attributes: + The ``list_item`` element contains only the `common attributes`_. + + +Examples +-------- + +reStructuredText_ source:: + + 1. Outer list, item 1. + + * Inner list, item 1. + * Inner list, item 2. + + 2. Outer list, item 2. + +Pseudo-XML_ fragment from simple parsing:: + + <enumerated_list enumtype="arabic" prefix="" suffix="."> + <list_item> + <paragraph> + Outer list, item 1. + <bullet_list bullet="*"> + <list_item> + <paragraph> + Inner list, item 1. + <list_item> + <paragraph> + Inner list, item 2. + <list_item> + <paragraph> + Outer list, item 2. + +See bullet_list_ or enumerated_list_ for further examples. + + +``literal`` +=========== + +`To be completed`_. + + +``literal_block`` +================= + +The ``literal_block`` element contains a block of text where line +breaks and whitespace are significant and must be preserved. +``literal_block`` elements are commonly used for program listings and +interactive computer sessions. See `line_block`_ for an alternative +useful for verse and addresses. + + +Details +------- + +:Category: + `Simple Body Elements`_ + +:Analogues: + ``literal_block`` is analogous to the HTML "pre" element and to + the DocBook "programlisting" and "screen" elements. + +:Processing: + ``literal_block`` elements are typically rendered in a monospaced + typeface. It is crucial that all whitespace and line breaks are + preserved in the rendered form. + +:Parents: + All elements employing the `%body.elements;`_ or + `%structure.model;`_ parameter entities in their content models + may contain ``literal_block``. + +:Children: + ``literal_block`` elements may contain text data plus `inline + elements`_. + + .. parsed-literal:: + + `%text.model;`_ + +:Attributes: + The ``literal_block`` element contains the `common attributes`_ + plus `xml:space`_. + +:Parameter Entities: + The `%body.elements;`_ parameter entity directly includes + ``literal_block``. The `%structure.model;`_ parameter entity + indirectly includes ``literal_block``. + + +Examples +-------- + +reStructuredText source:: + + Here is a literal block:: + + if literal_block: + text = 'is left as-is' + spaces_and_linebreaks = 'are preserved' + markup_processing = None + +Pseudo-XML_ fragment from simple parsing:: + + <paragraph> + Here is a literal block: + <literal_block xml:space="preserve"> + if literal_block: + text = 'is left as-is' + spaces_and_linebreaks = 'are preserved' + markup_processing = None + +``math`` +======== + +The ``math`` element contains text in `LaTeX math format` [#latex-math]_ +that is typeset as mathematical notation (inline formula). + +If the output format does not support math typesetting, the content is +inserted verbatim. + +Details +------- + +:Category: + `Inline Elements`_ + +:Analogues: + ``math`` is analogous to a MathML "math" element or + the LaTeX (``$ math $``) mode. + +:Processing: + Rendered as mathematical notation. + +:Parents: + All elements employing the `%inline.elements;`_ parameter entities in + their content models may contain ``math``. + +:Children: + ``math`` elements may contain text data. + + .. parsed-literal:: + + `%text.model;`_ + +:Attributes: + The ``math`` element contains the `common attributes`_. + +.. [#latex-math] For details of the supported mathematical language, see + the `"math" directive`_ + +.. _"math" directive: rst/directives.html#math + + +``math_block`` +============== + +The ``math_block`` element contains a block of text in `LaTeX math +format` [#latex-math]_ that is typeset as mathematical notation +(display formula). The ``math_block`` element is generated during +the initial parse from a `"math" directive`_. + +If the output format does not support math typesetting, the content is +inserted verbatim. + +Details +------- + +:Category: + `Simple Body Elements`_ + +:Analogues: + ``math_block`` is analogous to a LaTeX "equation*" environment or + a MathML "math" element displayed as block-level element. + +:Processing: + Rendered in a block as mathematical notation, typically centered or with + indentation + +:Parents: + All elements employing the `%body.elements;`_ or + `%structure.model;`_ parameter entities in their content models + may contain ``math_block``. + +:Children: + ``math_block`` elements may contain text data. :: + + (#PCDATA) + +:Attributes: + The ``math`` element contains the `common attributes`_. + +``meta`` +======== + +The ``meta`` element is a container for "hidden" document +bibliographic data, or meta-data (data about the document). +It corresponds to HTML META tags. + +See also the docinfo_ element for displayed meta-data. +The document's `title attribute`_ stores the metadate document title. + + +Details +------- + +:Category: + `Structural Subelements`_ + +:Analogues: + ``meta`` is analogous to the HTML "meta" element + or the file properties in ODT or PDF documents. + +:Processing: + The ``meta`` element is stored as metadata if the export format + supports this. It is typically invisible and may be omitted from + the processed output. + + Meta-data may also be extracted from docinfo_ children + or the document_ attributes (title). + +:Parents: + Only the document_ element contains ``meta``. + +:Children: + None. + + +Example +------- + +The `"meta" directive`_ is used to create a ``meta`` element. +reStructuredText_ source:: + + .. meta:: + :description lang=en: An amusing story + :description lang=fr: Un histoire amusant + +Pseudo-XML_ fragment from simple parsing:: + + <meta content="An amusing story" lang="en" name="description"> + <meta content="Un histoire amusant" lang="fr" name="description"> + +.. _"meta" directive: rst/directives.html#meta + + +``note`` +======== + +The ``note`` element is an admonition, a distinctive and +self-contained notice. Also see the other admonition elements +Docutils offers (in alphabetical order): attention_, caution_, +danger_, error_, hint_, important_, tip_, warning_, and the generic +admonition_. + + +Details +------- + +:Category: + `Compound Body Elements`_ + +:Analogues: + ``note`` is analogous to the `DocBook "note"`_ element. + + .. _DocBook "note": https://tdg.docbook.org/tdg/5.1/note.html + +:Processing: + Rendered distinctly (inset and/or in a box, etc.), with the + generated title "Note" (or similar). + + +:Parents: + All elements employing the `%body.elements;`_ or + `%structure.model;`_ parameter entities in their content models + may contain ``note``. + +:Children: + ``note`` elements contain one or more `body elements`_. + + .. parsed-literal:: + + (`%body.elements;`_)+ + +:Attributes: + The ``note`` element contains only the `common attributes`_. + +:Parameter Entities: + The `%body.elements;`_ parameter entity directly includes + ``note``. The `%structure.model;`_ parameter entity indirectly + includes ``note``. + + +Examples +-------- + +reStructuredText source:: + + .. Note:: Admonitions can be handy to break up a + long boring technical document. + +Pseudo-XML_ fragment from simple parsing:: + + <note> + <paragraph> + Admonitions can be handy to break up a + long boring technical document. + +``option`` +========== + +The ``option`` element groups an option string together with zero or +more option argument placeholders. Note that reStructuredText_ +currently supports only one argument per option. + + +Details +------- + +:Category: + `Body Subelements`_ + +:Analogues: + ``option`` has no direct analogues in common DTDs. + +:Processing: + See option_list_. + +:Parents: + Only the option_group_ element contains ``option``. + +:Children: + Each ``option`` element contains one option_string_ and zero or + more option_argument_ elements. :: + + (option_string, option_argument*) + +:Attributes: + The ``option`` element contains only the `common attributes`_. + + +Examples +-------- + +See the examples for the option_list_ element. + + +``option_argument`` +=================== + +The ``option_argument`` element contains placeholder text for option +arguments. + + +Details +------- + +:Category: + `Body Subelements`_ + +:Analogues: + ``option_argument`` has no direct analogues in common DTDs. + +:Processing: + The value of the "delimiter" attribute is prefixed to the + ``option_argument``, separating it from its option_string_ or a + preceding ``option_argument``. The ``option_argument`` text is + typically rendered in a monospaced typeface, possibly italicized + or otherwise altered to indicate its placeholder nature. + +:Parents: + Only the option_ element contains ``option_argument``. + +:Children: + ``option_argument`` elements contain text data only. :: + + (#PCDATA) + +:Attributes: + The ``option_argument`` element contains the `common attributes`_ + plus delimiter_. + + ``delimiter`` contains the text preceding the ``option_argument``: + either the text separating it from the option_string_ (typically + either "=" or " ") or the text between option arguments (typically + either "," or " "). + + +Examples +-------- + +See the examples for the option_list_ element. + + +``option_group`` +================ + +The ``option_group`` element groups together one or more option_ +elements, all synonyms. + + +Details +------- + +:Category: + `Body Subelements`_ + +:Analogues: + ``option_group`` has no direct analogues in common DTDs. + +:Processing: + Typically option_ elements within an ``option_group`` are joined + together in a comma-separated list. + +:Parents: + Only the option_list_item_ element contains ``option_group``. + +:Children: + ``option_group`` elements contain one or more option_ elements. :: + + (option+) + +:Attributes: + The ``option_group`` element contains only the `common attributes`_. + +Examples +-------- + +See the examples for the option_list_ element. + + +``option_list`` +=============== + +Each ``option_list`` element contains a two-column list of +command-line options and descriptions, documenting a program's +options. + + +Details +------- + +:Category: + `Compound Body Elements`_ + +:Analogues: + ``option_list`` has no direct analogues in common DTDs. It can be + emulated with primitives such as tables. + +:Processing: + An ``option_list`` is typically rendered as a two-column list, + where the first column contains option strings and arguments, and + the second column contains descriptions. + +:Parents: + All elements employing the `%body.elements;`_ or + `%structure.model;`_ parameter entities in their content models + may contain ``option_list``. + +:Children: + ``option_list`` elements contain one or more option_list_item_ + elements. :: + + (option_list_item+) + +:Attributes: + The ``option_list`` element contains only the `common attributes`_. + +:Parameter Entities: + The `%body.elements;`_ parameter entity directly includes + ``option_list``. The `%structure.model;`_ parameter entity + indirectly includes ``option_list``. + + +Examples +-------- + +reStructuredText_ source:: + + -a command-line option "a" + -1 file, --one=file, --two file + Multiple options with arguments. + +Pseudo-XML_ fragment from simple parsing:: + + <option_list> + <option_list_item> + <option_group> + <option> + <option_string> + -a + <description> + <paragraph> + command-line option "a" + <option_list_item> + <option_group> + <option> + <option_string> + -1 + <option_argument delimiter=" "> + file + <option> + <option_string> + --one + <option_argument delimiter="="> + file + <option> + <option_string> + --two + <option_argument delimiter=" "> + file + <description> + <paragraph> + Multiple options with arguments. + + +``option_list_item`` +==================== + +The ``option_list_item`` element is a container for a pair of +option_group_ and description_ elements. + + +Details +------- + +:Category: + `Body Subelements`_ + +:Analogues: + ``option_list_item`` has no direct analogues in common DTDs. + +:Processing: + See option_list_. + +:Parents: + Only the option_list_ element contains ``option_list_item``. + +:Children: + Each ``option_list_item`` element contains one option_group_ and + one description_ element. :: + + (option_group, description) + +:Attributes: + The ``option_list_item`` element contains only the `common attributes`_. + + +Examples +-------- + +See the examples for the option_list_ element. + + +``option_string`` +================= + +The ``option_string`` element contains the text of a command-line +option. + + +Details +------- + +:Category: + `Body Subelements`_ + +:Analogues: + ``option_string`` has no direct analogues in common DTDs. + +:Processing: + The ``option_string`` text is typically rendered in a monospaced + typeface. + +:Parents: + Only the option_ element contains ``option_string``. + +:Children: + ``option_string`` elements contain text data only. :: + + (#PCDATA) + +:Attributes: + The ``option_string`` element contains only the `common attributes`_. + + +Examples +-------- + +See the examples for the option_list_ element. + + +``organization`` +================ + +The ``organization`` element contains the name of document author's +organization, or the organization responsible for the document. + + +Details +------- + +:Category: + `Bibliographic Elements`_ + +:Analogues: + ``organization`` is analogous to the DocBook "orgname", + "corpname", or "publishername" elements. + +:Processing: + See docinfo_. + +:Parents: + Only the docinfo_ element contains ``organization``. + +:Children: + ``organization`` elements may contain text data plus `inline + elements`_. + + .. parsed-literal:: + + `%text.model;`_ + +:Attributes: + The ``organization`` element contains only the `common attributes`_. + +:Parameter Entities: + The `%bibliographic.elements;`_ parameter entity directly includes + ``organization``. + + +Examples +-------- + +reStructuredText_ source:: + + Document Title + ============== + + :Organization: Humankind + +Complete pseudo-XML_ result after parsing and applying transforms:: + + <document ids="document-title" names="document title"> + <title> + Document Title + <docinfo> + <organization> + Humankind + +See docinfo_ for a more complete example, including processing +context. + + +``paragraph`` +============= + +The ``paragraph`` element contains the text and inline elements of a +single paragraph, a fundamental building block of documents. + + +Details +------- + +:Category: + `Simple Body Elements`_ + +:Analogues: + ``paragraph`` is analogous to the HTML "p" element and to the + DocBook "para" elements. + +:Parents: + All elements employing the `%body.elements;`_ or + `%structure.model;`_ parameter entities in their content models + may contain ``paragraph``. + +:Children: + ``paragraph`` elements may contain text data plus `inline + elements`_. + + .. parsed-literal:: + + `%text.model;`_ + +:Attributes: + The ``paragraph`` element contains only the `common attributes`_. + +:Parameter Entities: + The `%body.elements;`_ parameter entity directly includes + ``paragraph``. The `%structure.model;`_ parameter entity + indirectly includes ``paragraph``. + + +Examples +-------- + +reStructuredText_ source:: + + A paragraph. + +Pseudo-XML_ fragment from simple parsing:: + + <paragraph> + A paragraph. + + +``pending`` +=========== + +`To be completed`_. + + +``problematic`` +=============== + +`To be completed`_. + + +``raw`` +======= + +`To be completed`_. + + +``reference`` +============= + +`To be completed`_. + + +``revision`` +============ + +The ``revision`` element contains the revision number of the document. +It can be used alone or in conjunction with version_. + + +Details +------- + +:Category: + `Bibliographic Elements`_ + +:Analogues: + ``revision`` is analogous to but simpler than the DocBook + "revision" element. It closely matches the DocBook "revnumber" + element, but in a simpler context. + +:Processing: + Often used with the RCS/CVS keyword "Revision". See docinfo_. + +:Parents: + Only the docinfo_ element contains ``revision``. + +:Children: + ``revision`` elements may contain text data plus `inline + elements`_. + + .. parsed-literal:: + + `%text.model;`_ + +:Attributes: + The ``revision`` element contains only the `common attributes`_. + +:Parameter Entities: + The `%bibliographic.elements;`_ parameter entity directly includes + ``revision``. + + +Examples +-------- + +reStructuredText_ source:: + + Document Title + ============== + + :Version: 1 + :Revision: b + +Complete pseudo-XML_ result after parsing and applying transforms:: + + <document ids="document-title" names="document title"> + <title> + Document Title + <docinfo> + <version> + 1 + <revision> + b + +See docinfo_ for a more complete example, including processing +context. + + +``row`` +======= + +`To be completed`_. + + +``rubric`` +========== + + rubric n. 1. a title, heading, or the like, in a manuscript, + book, statute, etc., written or printed in red or otherwise + distinguished from the rest of the text. ... + + -- Random House Webster's College Dictionary, 1991 + +A rubric is like an informal heading that doesn't correspond to the +document's structure. + +`To be completed`_. + + +``section`` +=========== + +The ``section`` element is the main unit of hierarchy for Docutils +documents. Docutils ``section`` elements are a recursive structure; a +``section`` may contain other ``section`` elements, without limit. +Paragraphs and other body elements may occur before a ``section``, but +not after it. + + +Details +------- + +:Category: + `Structural Elements`_ + +:Analogues: + ``section`` is analogous to the recursive "section" elements in + DocBook and HTML5. + +:Parents: + The following elements may contain ``section``: document_, + section_ + +:Children: + ``section`` elements begin with a title_, and may contain `body + elements`_ as well as transition_, topic_, and sidebar_ elements. + + .. parsed-literal:: + + (title_, `%structure.model;`_) + + See the `%structure.model;`_ parameter entity for details of the body + of a ``section``. + +:Attributes: + The ``section`` element contains only the `common attributes`_. + +:Parameter Entities: + The `%section.elements;`_ parameter entity directly includes + ``section``. The `%structure.model;`_ parameter entity indirectly + includes ``section``. + + +Examples +-------- + +reStructuredText_ source:: + + Title 1 + ======= + Paragraph 1. + + Title 2 + ------- + Paragraph 2. + + Title 3 + ======= + Paragraph 3. + + Title 4 + ------- + Paragraph 4. + +Complete pseudo-XML_ result after parsing:: + + <document> + <section ids="title-1" names="title 1"> + <title> + Title 1 + <paragraph> + Paragraph 1. + <section ids="title-2" names="title 2"> + <title> + Title 2 + <paragraph> + Paragraph 2. + <section ids="title-3" names="title 3"> + <title> + Title 3 + <paragraph> + Paragraph 3. + <section ids="title-4" names="title 4"> + <title> + Title 4 + <paragraph> + Paragraph 4. + + +``sidebar`` +=========== + +Sidebars are like miniature, parallel documents that occur inside +other documents, providing related or reference material. A +``sidebar`` is typically offset by a border and "floats" to the side +of the page; the document's main text may flow around it. Sidebars +can also be likened to super-footnotes; their content is outside of +the flow of the document's main text. + +The ``sidebar`` element is a nonrecursive section_-like construct +which may occur at the top level of a section_ wherever a body element +(list, table, etc.) is allowed. In other words, ``sidebar`` elements +cannot nest inside body elements, so you can't have a ``sidebar`` +inside a ``table`` or a ``list``, or inside another ``sidebar`` (or +topic_). + + +Details +------- + +:Category: + `Structural Elements`_ + +:Analogues: + ``sidebar`` is analogous to the DocBook "sidebar" element. + +:Processing: + A ``sidebar`` element should be set off from the rest of the + document somehow, typically with a border. Sidebars typically + "float" to the side of the page and the document's main text flows + around them. + +:Parents: + The following elements may contain ``sidebar``: document_, + section_ + +:Children: + ``sidebar`` elements begin with optional title_ and subtitle_ + and contain `body elements`_ and topic_ elements. + + .. parsed-literal:: + + (title, subtitle?, + (`%body.elements;`_ | topic)+) + +:Attributes: + The ``sidebar`` element contains only the `common attributes`_. + +:Parameter Entities: + The `%structure.model;`_ parameter entity directly includes + ``sidebar``. + + +Examples +-------- + +The `"sidebar" directive`_ is used to create a ``sidebar`` element. +reStructuredText_ source:: + + .. sidebar:: Optional Title + :subtitle: If Desired + + Body. + +Pseudo-XML_ fragment from simple parsing:: + + <sidebar> + <title> + Optional Title + <subtitle> + If Desired + <paragraph> + Body. + +.. _"sidebar" directive: rst/directives.html#sidebar + + +``status`` +========== + +The ``status`` element contains a status statement for the document, +such as "Draft", "Final", "Work In Progress", etc. + + +Details +------- + +:Category: + `Bibliographic Elements`_ + +:Analogues: + ``status`` is analogous to the DocBook "status" element. + +:Processing: + See docinfo_. + +:Parents: + Only the docinfo_ element contains ``status``. + +:Children: + ``status`` elements may contain text data plus `inline elements`_. + + .. parsed-literal:: + + `%text.model;`_ + +:Attributes: + The ``status`` element contains only the `common attributes`_. + +:Parameter Entities: + The `%bibliographic.elements;`_ parameter entity directly includes + ``status``. + + +Examples +-------- + +reStructuredText_ source:: + + Document Title + ============== + + :Status: Work In Progress + +Complete pseudo-XML_ result after parsing and applying transforms:: + + <document ids="document-title" names="document title"> + <title> + Document Title + <docinfo> + <status> + Work In Progress + +See docinfo_ for a more complete example, including processing +context. + + +``strong`` +========== + +`To be completed`_. + + +``subscript`` +============= + +`To be completed`_. + + +``substitution_definition`` +=========================== + +`To be completed`_. + + +``substitution_reference`` +========================== + +`To be completed`_. + + +``subtitle`` +============ + +The ``subtitle`` element stores the subtitle of a document_. + + +Details +------- + +:Category: + `Structural Subelements`_ + +:Analogues: + ``subtitle`` is analogous to HTML header elements ("h2" etc.) and + to the DocBook "subtitle" element. + +:Processing: + A document's subtitle is usually rendered smaller than its title_. + +:Parents: + The document_ and sidebar_ elements may contain ``subtitle``. + +:Children: + ``subtitle`` elements may contain text data plus `inline + elements`_. + + .. parsed-literal:: + + `%text.model;`_ + +:Attributes: + The ``subtitle`` element contains only the `common attributes`_. + + +Examples +-------- + +reStructuredText_ source:: + + ======= + Title + ======= + ---------- + Subtitle + ---------- + + A paragraph. + +Complete pseudo-XML_ result after parsing and applying transforms:: + + <document ids="title" names="title"> + <title> + Title + <subtitle ids="subtitle" names="subtitle"> + Subtitle + <paragraph> + A paragraph. + +Note how two section levels have collapsed, promoting their titles to +become the document's title and subtitle. Since there is only one +structural element (document), the subsection's ``ids`` and ``names`` +attributes are stored in the ``subtitle`` element. + + +``superscript`` +=============== + +`To be completed`_. + + +``system_message`` +================== + +`To be completed`_. + + +``table`` +========= + +The ``table`` element identifies a data arrangement with rows and columns. + +Docutils tables are based on the `Exchange subset of the CALS-table +model` [exchange-table-model]_. [#]_ + +.. [#] The interpretation of column widths in colspec_ differs from the + specification. + +Details +------- + +:Category: + `Compound Body Elements`_ + +:Analogues: + ``table`` is analogous to the HTML "table" element. + +:Processing: + Content is rendered in rows and columns. + +:Parents: + All elements employing the `%body.elements;`_ or + `%structure.model;`_ parameter entities in their content models + may contain ``table``. + +:Children: + ``table`` elements begin with an optional title_ (caption) and may + contain one or more `tgroup`_ elements. :: + + (title?, tgroup+) + +:Attributes: + The ``table`` element contains the attributes frame, colsep, rowsep, + and pgwide defined in the exchange-table-model_, the + `common attributes`_, align_, and width_. + +:Parameter Entities: + The `%body.elements;`_ parameter entity directly includes + ``table``. The `%structure.model;`_ parameter entity + indirectly includes ``table``. + +Examples +-------- + +In reStructuredText, tables can specified via the +table__, csv-table_, or list-table_ directives or directly as +`grid table`_ or `simple table`_, e.g. :: + + ======== ==== + bread £2 + butter £30 + ======== ==== + +Pseudo-XML_ fragment from simple parsing:: + + <table> + <tgroup cols="2"> + <colspec colwidth="8"> + <colspec colwidth="4"> + <tbody> + <row> + <entry> + <paragraph> + bread + <entry> + <paragraph> + £2 + <row> + <entry> + <paragraph> + butter + <entry> + <paragraph> + £30 + +__ rst/directives.html#table +.. _csv-table: rst/directives.html#csv-table +.. _list-table: rst/directives.html#list-table +.. _grid table: rst/restructuredtext.html#grid-tables +.. _simple table: rst/restructuredtext.html#simple-tables + +.. [exchange-table-model] `XML Exchange Table Model DTD`, OASIS Technical + Memorandum 9901:1999, http://www.oasis-open.org/html/tm9901.html. + +``target`` +========== + +`To be completed`_. + + +``tbody`` +========= + +`To be completed`_. + + +``term`` +======== + +The ``term`` element contains a word or phrase being defined in a +definition_list_. + + +Details +------- + +:Category: + `Body Subelements`_ (simple) + +:Analogues: + ``term`` is analogous to the HTML "dt" element and to the DocBook + "term" element. + +:Processing: + See definition_list_item_. + +:Parents: + Only the definition_list_item_ element contains ``term``. + +:Children: + ``term`` elements may contain text data plus `inline elements`_. + + .. parsed-literal:: + + `%text.model;`_ + +:Attributes: + The ``term`` element contains only the `common attributes`_. + + +Examples +-------- + +See the examples for the definition_list_, definition_list_item_, and +classifier_ elements. + + +``tgroup`` +========== + +See [exchange-table-model]_. +.. parsed-literal:: + + (colspec_\*, thead_\?, tbody_) + + +`To be completed`_. + + +``thead`` +========= + +`To be completed`_. + + +``tip`` +======= + +The ``tip`` element is an admonition, a distinctive and self-contained +notice. Also see the other admonition elements Docutils offers (in +alphabetical order): attention_, caution_, danger_, error_, hint_, +important_, note_, warning_, and the generic admonition_. + + +Details +------- + +:Category: + `Compound Body Elements`_ + +:Analogues: + ``tip`` is analogous to the `DocBook "tip"`_ element. + + .. _DocBook "tip": https://tdg.docbook.org/tdg/5.1/tip.html + +:Processing: + Rendered distinctly (inset and/or in a box, etc.), with the + generated title "Tip" (or similar). + +:Parents: + All elements employing the `%body.elements;`_ or + `%structure.model;`_ parameter entities in their content models + may contain ``tip``. + +:Children: + ``tip`` elements contain one or more `body elements`_. + + .. parsed-literal:: + + (`%body.elements;`_)+ + +:Attributes: + The ``tip`` element contains only the `common attributes`_. + +:Parameter Entities: + The `%body.elements;`_ parameter entity directly includes ``tip``. + The `%structure.model;`_ parameter entity indirectly includes + ``tip``. + + +Examples +-------- + +reStructuredText source:: + + .. Tip:: 15% if the service is good. + +Pseudo-XML_ fragment from simple parsing:: + + <tip> + <paragraph> + 15% if the service is good. + + +.. _title: + +``title`` +========= + +The ``title`` element stores the title of a document_, section_, +sidebar_, table_, topic_, or generic admonition_. + + +Details +------- + +:Category: + `Structural Subelements`_ + +:Analogues: + ``title`` is analogous to HTML "title" and header ("h1" etc.) + elements, and to the DocBook "title" element. + +:Parents: + The following elements may contain ``title``: admonition_, document_, + section_, sidebar_, table_, topic_. + +:Children: + ``title`` elements may contain text data plus `inline elements`_. + + .. parsed-literal:: + + `%text.model;`_ + +:Attributes: + The ``title`` element contains the `common attributes`_ + plus refid_ and auto_. + + ``refid`` is used as a backlink to a table of contents entry. + + ``auto`` is used to indicate (with value "1") that the ``title`` + has been numbered automatically. + + +Examples +-------- + +reStructuredText_ source:: + + A Title + ======= + + A paragraph. + +Pseudo-XML_ fragment from simple parsing:: + + <section ids="a-title" names="a title"> + <title> + A Title + <paragraph> + A paragraph. + + +``title_reference`` +=================== + +`To be completed`_. + + +``topic`` +========= + +The ``topic`` element is a nonrecursive section_-like construct which +may occur at the top level of a section_ wherever a body element +(list, table, etc.) is allowed. In other words, ``topic`` elements +cannot nest inside body elements, so you can't have a ``topic`` inside +a ``table`` or a ``list``, or inside another ``topic``. + + +Details +------- + +:Category: + `Structural Elements`_ + +:Analogues: + ``topic`` is analogous to the DocBook "simplesect" element. + +:Processing: + A ``topic`` element should be set off from the rest of the + document somehow, such as with indentation or a border. + +:Parents: + The following elements may contain ``topic``: document_, section_, + sidebar_ + +:Children: + ``topic`` elements begin with a title_ and may contain `body + elements`_. + + .. parsed-literal:: + + (title?, (`%body.elements;`_)+) + +:Attributes: + The ``topic`` element contains only the `common attributes`_. + +:Parameter Entities: + The `%structure.model;`_ parameter entity directly includes + ``topic``. + + +Examples +-------- + +The `"topic" directive`_ is used to create a ``topic`` element. +reStructuredText_ source:: + + .. topic:: Title + + Body. + +Pseudo-XML_ fragment from simple parsing:: + + <topic> + <title> + Title + <paragraph> + Body. + +.. _"topic" directive: rst/directives.html#topic + + +``transition`` +============== + +The ``transition`` element is commonly seen in novels and short +fiction, as a gap spanning one or more lines, with or without a type +ornament such as a row of asterisks. Transitions separate body +elements and sections, dividing a section into untitled divisions. A +transition may not begin or end a section [#]_ or document, nor may +two transitions be immediately adjacent. + +See `Doctree Representation of Transitions`__ in `A Record of +reStructuredText Syntax Alternatives`__. + +.. [#] In reStructuredText markup, a transition may appear to fall at + the end of a section immediately before another section. A + transform recognizes this case and moves the transition so it + separates the sections. + +__ ../dev/rst/alternatives.html#doctree-representation-of-transitions +__ ../dev/rst/alternatives.html + + +Details +------- + +:Category: + `Structural Subelements`_ + +:Analogues: + ``transition`` is analogous to the HTML "hr" element. + +:Processing: + The ``transition`` element is typically rendered as vertical + whitespace (more than that separating paragraphs), with or without + a horizontal line or row of asterisks. In novels, transitions are + often represented as a row of three well-spaced asterisks with + vertical space above and below. + +:Parents: + The following elements may contain ``transition``: document_, + section_ + +:Children: + The ``transition`` element has no content. + +:Attributes: + The ``transition`` element contains only the `common attributes`_. + +:Parameter Entities: + The `%structure.model;`_ parameter entity directly includes + ``transition``. + + +Examples +-------- + +reStructuredText_ source:: + + Paragraph 1. + + -------- + + Paragraph 2. + +Complete pseudo-XML_ result after parsing:: + + <document> + <paragraph> + Paragraph 1. + <transition> + <paragraph> + Paragraph 2. + + +``version`` +=========== + +The ``version`` element contains the version number of the document. +It can be used alone or in conjunction with revision_. + + +Details +------- + +:Category: + `Bibliographic Elements`_ + +:Analogues: + ``version`` may be considered analogous to the DocBook "revision", + "revnumber", or "biblioid" elements. + +:Processing: + Sometimes used with the RCS/CVS keyword "Revision". See docinfo_ + and revision_. + +:Parents: + Only the docinfo_ element contains ``version``. + +:Children: + ``version`` elements may contain text data plus `inline + elements`_. + + .. parsed-literal:: + + `%text.model;`_ + +:Attributes: + The ``version`` element contains only the `common attributes`_. + +:Parameter Entities: + The `%bibliographic.elements;`_ parameter entity directly includes + ``version``. + + +Examples +-------- + +reStructuredText_ source:: + + Document Title + ============== + + :Version: 1.1 + +Complete pseudo-XML_ result after parsing and applying transforms:: + + <document ids="document-title" names="document title"> + <title> + Document Title + <docinfo> + <version> + 1.1 + +See docinfo_ for a more complete example, including processing +context. + + +``warning`` +=========== + +The ``warning`` element is an admonition, a distinctive and +self-contained notice. Also see the other admonition elements +Docutils offers (in alphabetical order): attention_, caution_, +danger_, error_, hint_, important_, note_, tip_. + + +Details +------- + +:Category: + `Compound Body Elements`_ + +:Analogues: + ``warning`` is analogous to the `DocBook "warning"`_ element. + + .. _DocBook "warning": https://tdg.docbook.org/tdg/5.1/warning.html + +:Processing: + Rendered distinctly (inset and/or in a box, etc.), with the + generated title "Warning" (or similar). + +:Parents: + All elements employing the `%body.elements;`_ or + `%structure.model;`_ parameter entities in their content models + may contain ``warning``. + +:Children: + ``warning`` elements contain one or more `body elements`_. + + .. parsed-literal:: + + (`%body.elements;`_)+ + +:Attributes: + The ``warning`` element contains only the `common attributes`_. + +:Parameter Entities: + The `%body.elements;`_ parameter entity directly includes + ``warning``. The `%structure.model;`_ parameter entity indirectly + includes ``warning``. + + +Examples +-------- + +reStructuredText source:: + + .. WARNING:: Reader discretion is strongly advised. + +Pseudo-XML_ fragment from simple parsing:: + + <warning> + <paragraph> + Reader discretion is strongly advised. + + +.. _attribute type: + +--------------- +Attribute types +--------------- + +.. contents:: :local: + :depth: 1 + +Standard attribute types +======================== + +Attribute types defined in the `attribute types`__ section of the +`XML 1.0 specification`_: + +_`CDATA` + Character data. CDATA attributes may contain arbitrary text. + +_`NMTOKEN` + A "name token". One or more of letters, digits, ".", "-", and + "_". + +_`NMTOKENS` + One or more space-separated NMTOKEN values. + +_`EnumeratedType` + The attribute value may be one of a specified list of values. + +Docutils uses `custom attribute types`_ instead of the ID, IDREF, and IDREFS +standard types, because it does not adhere to the `One ID per Element Type`_ +validity constraint. + +__ `XML attribute types`_ + + +Custom attribute types +====================== + +The Docutils DTD defines `parameter entities`_ that resolve to standard +attribute types to highlight specific attribute value constraints. + +_`yesorno` + Boolean: no if zero ("0"), yes if any other value. + Resolves to ``NMTOKEN``. + + Used in the `anonymous`_ and `stub`_ attributes. + +_`number` + The attribute value must be a number. Resolves to ``NMTOKEN``. + + Used in the `level`_, `morecols`_, `scale`_, and `start`_ attributes. + +_`measure` + A number which may be immediately followed by a unit or percent sign. + Resolves to CDATA. + + Used in the `height`_ and `width`_ attributes. + +_`classnames.type` + A space-separated list of `class names` [#classname]_. Resolves to NMTOKEN. + + Used in the `classes`_ attribute. + +_`refname.type` + A normalized_ `reference name`_. Resolves to CDATA (in contrast to + NMTOKENS, `reference names`_ may consist of any text). + + Used in the `refname`_ attribute. + +_`refnames.type` + A space-separated list of `reference names`_. Resolves to CDATA. + + `Backslash escaping`_ is used for space characters inside a `reference + name`. + + Used in the `names`_ and `dupnames`_ attributes. + +_`ids.type` + A space-separated list of unique `identifier keys` [#identifier]_. + Resolves to NMTOKENS (the XML `standard attribute types`_ do not provide + for a list of IDs). + + Used in the `ids`_ attribute. + +_`idref.type` + A reference to an `identifier key`_. + Resolves to NMTOKEN (Docutils identifier keys do not use the ID standard + type as required by the `IDREF Validity constraint`_). + + Used in the `refid`_ attribute. + +_`idrefs.type` + A list of references to element identifiers. + Resolves to NMTOKENS. + + Used in the `backrefs`_ attribute. + +.. _`class names`: + +.. [#classname] `Class names` define sub-classes of existing elements. + + In reStructuredText, custom `class names` can be specified using + the `"class" directive`_, a directive's `:class: option`_, or + `custom interpreted text roles`_. + Docutils normalizes them to conform to both, HTML4.1 and CSS1.0 `name` + requirements (the regular expression ``[a-z](-?[a-z0-9]+)*``) via the + `identifier normalization`_. + +.. _identifiers: +.. _identifier key: +.. _identifier keys: + +.. [#identifier] `Identifier keys` are used for cross references in + generated documents. Therefore, they must comply with restrictions in the + respective output formats (HTML4.1__, HTML5__, `polyglot HTML`__, + LaTeX__, ODT__, troff (manpage), XML__). + + Identifier keys cannot be specified directly in reStructuredText. + Docutils generates them by applying the `identifier normalization`_ to + `reference names`_ or from the auto_id_prefix_, prepending the id_prefix_ + and potentially appending numbers for disambiguation. + + __ https://www.w3.org/TR/html401/types.html#type-name + __ https://www.w3.org/TR/html50/dom.html#the-id-attribute + __ https://www.w3.org/TR/html-polyglot/#id-attribute + __ https://tex.stackexchange.com/questions/18311/what-are-the-valid-names-as-labels + __ https://help.libreoffice.org/6.3/en-US/text/swriter/01/04040000.html?DbPAR=WRITER#bm_id4974211 + __ https://www.w3.org/TR/REC-xml/#id + + +.. _XML 1.0 specification: https://www.w3.org/TR/REC-xml +.. _XML attribute types: https://www.w3.org/TR/REC-xml/#sec-attribute-types +.. _One ID per Element Type: https://www.w3.org/TR/REC-xml/#one-id-per-el +.. .. _ID attribute type: https://www.w3.org/TR/REC-xml/#id +.. _parameter entities: https://www.w3.org/TR/REC-xml/#dt-PE +.. _IDREF Validity constraint: https://www.w3.org/TR/REC-xml/#idref + +.. _reference names: +.. _reference name: rst/restructuredtext.html#reference-names +.. _backslash escaping: rst/restructuredtext.html#escaping-mechanism +.. _id_prefix: ../user/config.html#id-prefix +.. _auto_id_prefix: ../user/config.html#auto-id-prefix +.. _identifier normalization: + rst/directives.html#identifier-normalization +.. _`:class: option`: rst/directives.html#class-option +.. _custom interpreted text roles: + rst/directives.html#custom-interpreted-text-roles + + +--------------------- + Attribute Reference +--------------------- + +.. contents:: :local: + :depth: 1 + +_`Common Attributes`: + Through the `%basic.atts;`_ parameter entity, all elements support + the following attributes: ids_, names_ or dupnames_, source_, and + classes_. + +``align`` +========= + +Attribute type: `CDATA`_. Default value: none (inherit). + +The ``align`` attribute is used in the figure_, image_, and table_ elements +(via the `%align-h.att;`_ and `%align-hv.att;`_ parameter entities). + +``anonymous`` +============= + +Attribute type: `yesorno`_. Default value: none (implies no). + +The ``anonymous`` attribute is used for unnamed hyperlinks in the +target_ and reference_ elements (via the `%anonymous.att;`_ parameter +entity). + + +``auto`` +======== + +Attribute type: `CDATA`_. Default value: none. + +The ``auto`` attribute is used to indicate automatically-numbered +footnote_, footnote_reference_ and title_ elements (via the +`%auto.att;`_ parameter entity). + + +``backrefs`` +============ + +Attribute type: `idrefs.type`_. Default value: none. + +The ``backrefs`` attribute contains a space-separated list of identifier_ +references, used for backlinks from footnote_, citation_, and +system_message_ elements (via the `%backrefs.att;`_ parameter entity). + + +``bullet`` +========== + +Attribute type: `CDATA`_. Default value: none. + +The ``bullet`` attribute is used in the bullet_list_ element. + + +``classes`` +=========== + +Attribute type: `classnames.type`_. Default value: none. + +The ``classes`` attribute is a space separated list containing zero or more +`class names`_. + +The purpose of the attribute is to indicate an "is-a" variant relationship, +to allow an extensible way of defining sub-classes of existing elements. It +can be used to carry context forward between a Docutils Reader and Writer, +when a custom structure is reduced to a standardized document tree. One +common use is in conjunction with stylesheets, to add selection criteria. +It should not be used to carry formatting instructions or arbitrary content. + +The ``classes`` attribute's contents should be ignorable. Writers that +are not familiar with the variant expressed should be able to ignore +the attribute. + +``classes`` is one of the `common attributes`_, shared by all Docutils +elements. + +.. _"class" directive: rst/directives.html#class + + +``colwidth`` +============ + +Attribute type: `CDATA`_. Default value: "1*" + +Column width specification used in the colspec_ element. +Defined in the exchange-table-model_. + +Either proportional measure of the form number*, e.g., “5*” for 5 times +the proportion, or “*” (which is equivalent to “1*”); fixed measure, +e.g., 2pt for 2 point, 3pi for 3 pica. + +The fixed unit values are case insensitive. The standard list of allowed +unit values is “pt” (points), “cm” (centimeters), “mm” (millimeters), +“pi” (picas), and “in” (inches). The default fixed unit should be +interpreted as “pt” if neither a proportion nor a fixed unit is +specified. + +.. important:: Currently, Docutils interprets unitless numbers as + proportions. + + +``delimiter`` +============= + +Attribute type: `CDATA`_. Default value: none. + +The ``delimiter`` attribute is used in the option_argument_ element. + + +``dupnames`` +============ + +Attribute type: `refnames.type`_. Default value: none. + +The ``dupnames`` attribute replaces the `names`_ attribute +when there has been a naming conflict. +``dupnames`` is one of the `common attributes`_, shared by all +Docutils elements. + + +``enumtype`` +============ + +Attribute type: EnumeratedType_, one of "arabic", "loweralpha", +"upperalpha", "lowerroman", or "upperroman". Default value: none. + +The ``enumtype`` attribute is used in the enumerated_list_ element. + + +``height`` +========== + +Attribute type: measure_. Default value: none. + +The ``height`` attribute is used in the image_ element. + + +``ids`` +======= + +Attribute type: `ids.type`_. Default value: none. + +The ``ids`` attribute is a space separated list containing one or more +unique `identifier keys`_, typically assigned by the system. + +``ids`` is one of the `common attributes`_, shared by all Docutils +elements. + +.. TODO: + * Use 'id' for primary identifier key? + * Keep additional keys in `ids` + or in the preceding target elements? + +``level`` +========= + +Attribute type: number_. Default value: none. + +The ``level`` attribute is used in the system_message_ element. + +``morecols`` +============ + +Attribute type: number_. Default value: none. + +The ``morecols`` attribute is used in the table_ element. + +``names`` +========= + +Attribute type: `refnames.type`_. Default value: none. + +The ``names`` attribute is a space-separated list containing +`normalized`_ `reference names`_ of an element. Whitespace inside a +name is backslash escaped. +Each name in the list must be unique; if there are name conflicts +(two or more elements want to the same name), the contents will be +transferred to the `dupnames`_ attribute on the duplicate elements. +An element may have at most one of the ``names`` or ``dupnames`` +attributes, but not both. + +`Reference names`_ are identifiers assigned in the markup. They +originate from `internal hyperlink targets`_, a directive's `name +option`_, or the element's title or content and are used for +internal cross-references (cf. refname_). + +``names`` is one of the `common attributes`_, shared by all +Docutils elements. + +.. _normalized: + rst/restructuredtext.html#normalized-reference-names +.. _internal hyperlink targets: + rst/restructuredtext.html#internal-hyperlink-targets +.. _name option: rst/directives.html#name + + +``prefix`` +========== + +Attribute type: `CDATA`_. Default value: none. + +The ``prefix`` attribute is used in the enumerated_list_ element. + + +``refid`` +========= + +Attribute type: `idref.type`_. Default value: none. + +The ``refid`` attribute contains a reference to an `identifier key`_ + +``refid`` is used by the target_, reference_, footnote_reference_, +citation_reference_, title_ and problematic_ elements (via the +`%refid.att;`_ and `%reference.atts;`_ parameter entities). + + +``refname`` +=========== + +Attribute type: `refname.type`_. Default value: none. + +The ``refname`` attribute contains a reference to one of the +`reference names`_ in the `names`_ attribute of another element. On +a `target`_ element, ``refname`` indicates an `indirect target`_ which +may resolve to either an internal or external reference. Docutils +"transforms_" replace the ``refname`` attribute with a refid_ pointing +to the same element. + +``refname`` is used by the target_, reference_, footnote_reference_, +citation_reference_, and substitution_reference_ elements (via the +`%refname.att;`_ and `%reference.atts;`_ parameter entities). + +.. _indirect target: rst/restructuredtext.html#indirect-hyperlink-targets +.. _transforms: api/.html + + +``refuri`` +========== + +Attribute type: `CDATA`_. Default value: none. + +The ``refuri`` attribute contains an external reference to a URI/URL. +It is used by the target_, reference_, footnote_reference_, and +citation_reference_ elements (via the `%reference.atts;`_ parameter +entity). + + +``scale`` +========== + +Attribute type: number_. Default value: none. + +The ``scale`` attribute is used in the image_ element. + + +``source`` +========== + +Attribute type: `CDATA`_. Default value: none. + +The ``source`` attribute is used to store the path or URL to the +source text that was used to produce the document tree. It is one of +the `common attributes`_, declared for all Docutils elements. + + +``start`` +========= + +Attribute type: `number`_. Default value: none. + +The ``start`` attribute is used in the enumerated_list_ element. + + +``stub`` +========= + +Attribute type: `yesorno`_. Default value: none. + +The ``stub`` attribute is used in the colspec_ element. +It marks a table column containing "stubs" (row titles, on the left). +See also the csv-table_ and list-table_ directives. + +``suffix`` +========== + +Attribute type: `CDATA`_. Default value: none. + +The ``suffix`` attribute is used in the enumerated_list_ element. + + +.. _title attribute: + +``title`` +========= + +Attribute type: `CDATA`_. Default value: none. + +The ``title`` attribute stores the title metadata of a document_. This +title is typically not part of the rendered document. It may for +example be used in HTML's ``title`` element. + + +``width`` +========== + +Attribute type: measure_. Default value: none. + +The ``width`` attribute is used in the figure_, image_, and table_ element. + + +``xml:space`` +============= + +`Attribute type`: `EnumeratedType`_, one of "default" or "preserve". +Default value: "preserve" (fixed). + +The ``xml:space`` attribute is a standard XML attribute for +whitespace-preserving elements. It is used by the literal_block_, +line_block_, doctest_block_, comment_, and raw_ elements (via the +`%fixedspace.att;`_ parameter entity). It is a fixed attribute, meant +to communicate to an XML parser that the element contains significant +whitespace. The attribute value should not be set in a document +instance. + + +---------------------------- + Parameter Entity Reference +---------------------------- + +.. contents:: :local: + :depth: 1 + +Parameter entities are used to simplify the DTD (to share definitions +and reduce duplication) and to allow the DTD to be customized by +wrapper DTDs (external client DTDs that use or import the Docutils +DTD). Parameter entities may be overridden by wrapper DTDs, replacing +the definitions below with custom definitions. Parameter entities +whose names begin with "additional" are meant to allow easy extension +by wrapper DTDs. + +``%align-h.att;`` +================= + +The ``%align-h.att;`` parameter entity contains the align_ +attribute for horizontal alignment. + +Entity definition:: + + align (left | center | right) #IMPLIED + +The figure_ and table_ elements directly employ the +``%align-h.att;`` parameter entity in their attribute lists. + +``%align-hv.att;`` +================== + +The ``%align-hv.att;`` parameter entity contains the align_ +attribute for horizontal and vertical alignment. + +Entity definition:: + + align (top | middle | bottom | left | center | right) #IMPLIED + +The image_ element directly employs the ``%align-hv.att;`` parameter +entity in its attribute list. + +``%anonymous.att;`` +=================== + +The ``%anonymous.att;`` parameter entity contains the anonymous_ +attribute, used for unnamed hyperlinks. + +Entity definition:: + + anonymous %yesorno; #IMPLIED + +The reference_ and target_ elements directly employ the +``%anonymous.att;`` parameter entity in their attribute lists. + + +``%auto.att;`` +============== + +The ``%auto.att;`` parameter entity contains the auto_ attribute, used +to indicate an automatically-numbered footnote or title. + +Entity definition:: + + auto CDATA #IMPLIED + +The footnote_, footnote_reference_, and title_ elements directly +employ the ``%auto.att;`` parameter entity in their attribute lists. + + +``%backrefs.att;`` +================== + +The ``%backrefs.att;`` parameter entity contains the backrefs_ +attribute, a space-separated list of id references, for backlinks. + +Entity definition:: + + backrefs_ %idrefs.type; #IMPLIED + +The citation_, footnote_, and system_message_ elements directly employ +the ``%backrefs.att;`` parameter entity in their attribute lists. + + +``%basic.atts;`` +================ + +The ``%basic.atts;`` parameter entity lists the `common attributes`_. + +Entity definition: + +.. parsed-literal:: + + ids_ NMTOKENS #IMPLIED + names_ CDATA #IMPLIED + dupnames_ CDATA #IMPLIED + source_ CDATA #IMPLIED + classes_ NMTOKENS #IMPLIED + %additional.basic.atts; + +The ``%additional.basic.atts;`` parameter entity can be used by +wrapper DTDs to extend ``%basic.atts;``. + + +``%bibliographic.elements;`` +============================ + +The ``%bibliographic.elements;`` parameter entity contains an OR-list of all +`bibliographic elements`_. + +Entity definition: + +.. parsed-literal:: + + author_ | authors_ | organization_ | contact_ | address_ + | version_ | revision_ | status_ | date_ | copyright_ + | field_ + %additional.bibliographic.elements; + +The ``%additional.bibliographic.elements;`` parameter entity can be used by +wrapper DTDs to extend ``%bibliographic.elements;``. + +Only the docinfo_ element directly employs the +``%bibliographic.elements;`` parameter entity in its content model. + + +``%body.elements;`` +=================== + +The ``%body.elements;`` parameter entity contains an OR-list of all +`body elements`_. ``%body.elements;`` is itself contained within the +`%structure.model;`_ parameter entity. + +Entity definition: + +.. parsed-literal:: + + admonition_ | attention_ | block_quote_ | bullet_list_ | caution_ + | citation_ | compound_ | comment_ | container_ | danger_ + | definition_list_ | doctest_block_ | enumerated_list_ | error_ + | field_list_ | figure_ | footnote_ | hint_ | image_ | important_ + | line_block_ | literal_block_ | note_ | option_list_ + | paragraph_ | pending_ | raw_ reference_ | rubric_ + | substitution_definition_ | system_message_ | table_ | target_ + | tip_ | warning_ %additional.body.elements; + +The ``%additional.body.elements;`` parameter entity can be used by +wrapper DTDs to extend ``%body.elements;``. + +The ``%body.elements;`` parameter entity is directly employed in the +content models of the following elements: admonition_, attention_, +block_quote_, caution_, citation_, compound_, danger_, definition_, +description_, entry_, error_, field_body_, footer_, footnote_, +header_, hint_, important_, legend_, list_item_, note_, sidebar_, +system_message_, tip_, topic_, warning_ + +Via `%structure.model;`_, the ``%body.elements;`` parameter entity is +indirectly employed in the content models of the document_ and +section_ elements. + + +``%fixedspace.att;`` +==================== + +The ``%fixedspace.att;`` parameter entity contains the `xml:space`_ +attribute, a standard XML attribute for whitespace-preserving +elements. + +Entity definition: + +.. parsed-literal:: + + `xml:space`_ (default | preserve) #FIXED 'preserve' + +The ``%fixedspace.att;`` parameter entity is directly employed in the +attribute lists of the following elements: address_, comment_, +doctest_block_, line_block_, literal_block_, raw_ + + +``%inline.elements;`` +===================== + +The ``%inline.elements;`` parameter entity contains an OR-list of all +`inline elements`_. + +Entity definition: + +.. parsed-literal:: + + abbreviation_ | acronym_ | citation_reference_ | emphasis_ + | footnote_reference_ | generated_ | image_ | inline_ | literal_ + | problematic_ | raw_ | reference_ | strong_ | substitution_reference_ + | subscript_ | superscript_ | target_ | title_reference_ + %additional.inline.elements; + +The ``%additional.inline.elements;`` parameter entity can be used by +wrapper DTDs to extend ``%inline.elements;``. + +Via `%text.model;`_, the ``%inline.elements;`` parameter entity is +indirectly employed in the content models of the following elements: +abbreviation_, acronym_, address_, attribution_, author_, caption_, +classifier_, contact_, copyright_, date_, doctest_block_, emphasis_, +generated_, inline_, line_block_, literal_block_, math_, math_block_, +organization_, +paragraph_, problematic_, raw_, reference_, revision_, rubric_, +status_, strong_, subscript_, substitution_definition_, +substitution_reference_, subtitle_, superscript_, target_, term_, +title_, title_reference_, version_ + + +``%reference.atts;`` +==================== + +The ``%reference.atts;`` parameter entity groups together the refuri_, +refid_, and refname_ attributes. + +Entity definition: + +.. parsed-literal:: + + `%refuri.att;`_ + `%refid.att;`_ + `%refname.att;`_ + %additional.reference.atts; + +The ``%additional.reference.atts;`` parameter entity can be used by +wrapper DTDs to extend ``%additional.reference.atts;``. + +The citation_reference_, footnote_reference_, reference_, and target_ +elements directly employ the ``%reference.att;`` parameter entity in +their attribute lists. + + +``%refid.att;`` +================ + +The ``%refid.att;`` parameter entity contains the refid_ attribute, an +internal reference to the `ids`_ attribute of another element. + +Entity definition: + +.. parsed-literal:: + + refid_ %idref.type; #IMPLIED + +The title_ and problematic_ elements directly employ the +``%refid.att;`` parameter entity in their attribute lists. + +Via `%reference.atts;`_, the ``%refid.att;`` parameter entity is +indirectly employed in the attribute lists of the citation_reference_, +footnote_reference_, reference_, and target_ elements. + + +``%refname.att;`` +================= + +The ``%refname.att;`` parameter entity contains the refname_ +attribute, an internal reference to the `names`_ attribute of another +element. On a `target`_ element, ``refname`` indicates an indirect +target which may resolve to either an internal or external +reference. + +Entity definition: + +.. parsed-literal:: + + refname_ %refname.type; #IMPLIED + +The substitution_reference_ element directly employs the +``%refname.att;`` parameter entity in its attribute list. + +Via `%reference.atts;`_, the ``%refname.att;`` parameter entity is +indirectly employed in the attribute lists of the citation_reference_, +footnote_reference_, reference_, and target_ elements. + + +``%refuri.att;`` +================ + +The ``%refuri.att;`` parameter entity contains the refuri_ attribute, +an external reference to a URI/URL. + +Entity definition: + +.. parsed-literal:: + + refuri_ CDATA #IMPLIED + +Via `%reference.atts;`_, the ``%refuri.att;`` parameter entity is +indirectly employed in the attribute lists of the citation_reference_, +footnote_reference_, reference_, and target_ elements. + + +``%section.elements;`` +====================== + +The ``%section.elements;`` parameter entity contains an OR-list of all +section_-equivalent elements. ``%section.elements;`` is itself +contained within the `%structure.model;`_ parameter entity. + +Entity definition: + +.. parsed-literal:: + + section_ + %additional.section.elements; + +The ``%additional.section.elements;`` parameter entity can be used +by wrapper DTDs to extend ``%section.elements;``. + +Via `%structure.model;`_, the ``%section.elements;`` parameter entity +is indirectly employed in the content models of the document_ and +section_ elements. + + +``%structure.model;`` +===================== + +The ``%structure.model;`` parameter entity encapsulates the +hierarchical structure of a document and of its constituent parts. +See the discussion of the `element hierarchy`_ above. + +Entity definition: + +.. parsed-literal:: + + ( ( (`%body.elements;`_ | topic_ | sidebar_)+, transition_? )*, + ( (`%section.elements;`_), (transition_?, (`%section.elements;`_) )* )? ) + +Each document_ or section_ contains zero or more body elements, +topics, and/or sidebars, optionally interspersed with single +transitions, followed by zero or more sections (whose contents are +recursively the same as this model) optionally interspersed with +transitions. + +The following restrictions are imposed by this model: + +* Transitions must be separated by other elements (body elements, + sections, etc.). In other words, a transition may not be + immediately adjacent to another transition. + +* A transition may not occur at the beginning of a document or + section. + +.. The following is not the case with Docutils (since at least 2004) + (cf. test/functional/input/data/standard.txt) + + An additional restriction, which cannot be expressed in the language + of DTDs, is imposed by software: + + * A transition may not occur at the end of a document or section. + +The `%structure.model;`_ parameter entity is directly employed in the +content models of the document_ and section_ elements. + + +``%text.model;`` +================ + +The ``%text.model;`` parameter entity is used by many elements to +represent text data mixed with `inline elements`_. + +Entity definition: + +.. parsed-literal:: + + (#PCDATA | `%inline.elements;`_)* + +The ``%text.model;`` parameter entity is directly employed in the +content models of the following elements: abbreviation_, acronym_, +address_, author_, caption_, classifier_, contact_, copyright_, date_, +doctest_block_, emphasis_, field_name_, generated_, line_block_, +literal_block_, organization_, paragraph_, problematic_, raw_, +reference_, revision_, status_, strong_, substitution_definition_, +substitution_reference_, subtitle_, target_, term_, title_, version_ + + + +.. + Local Variables: + mode: indented-text + indent-tabs-mode: nil + sentence-end-double-space: t + fill-column: 70 + End: diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/ref/rst/definitions.txt b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/ref/rst/definitions.txt new file mode 100644 index 00000000..d2eff253 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/ref/rst/definitions.txt @@ -0,0 +1,180 @@ +============================================ + reStructuredText Standard Definition Files +============================================ +:Author: David Goodger +:Contact: docutils-develop@lists.sourceforge.net +:Revision: $Revision$ +:Date: $Date$ +:Copyright: This document has been placed in the public domain. + +.. contents:: + + +This document describes standard definition files, such as sets of +substitution definitions and interpreted text roles, that can be +included in reStructuredText documents. The `"include" directive`__ +has a special syntax for these standard definition files, angle +brackets around the file name:: + + .. include:: <filename.txt> + +__ directives.html#include + +The individual data files are stored with the Docutils source code in +the "docutils" package, in the ``docutils/parsers/rst/include`` +directory. + + +Substitution Definitions +======================== + +Many of the standard definition files contain sets of `substitution +definitions`__, which can be used in documents via `substitution +references`__. For example, the copyright symbol is defined in +``isonum.txt`` as "copy":: + + .. include:: <isonum.txt> + + Copyright |copy| 2003 by John Q. Public, all rights reserved. + +__ restructuredtext.html#substitution-definitions +__ restructuredtext.html#substitution-references + +Individual substitution definitions can also be copied from definition +files and pasted into documents. This has two advantages: it removes +dependencies, and it saves processing of unused definitions. However, +multiple substitution definitions add clutter to the document. + +Substitution references require separation from the surrounding text +with whitespace or punctuation. To use a substitution without +intervening whitespace, you can use the disappearing-whitespace escape +sequence, backslash-space:: + + .. include:: isonum.txt + + Copyright |copy| 2003, BogusMegaCorp\ |trade|. + +Custom substitution definitions may use the `"unicode" directive`__. +Whitespace is ignored and removed, effectively sqeezing together the +text:: + + .. |copy| unicode:: U+000A9 .. COPYRIGHT SIGN + .. |BogusMegaCorp (TM)| unicode:: BogusMegaCorp U+2122 + .. with trademark sign + + Copyright |copy| 2003, |BogusMegaCorp (TM)|. + +__ directives.html#unicode + +In addition, the "ltrim", "rtrim", and "trim" options may be used with +the "unicode" directive to automatically trim spaces from the left, +right, or both sides (respectively) of substitution references:: + + .. |---| unicode:: U+02014 .. em dash + :trim: + + +Character Entity Sets +--------------------- + +The following files contain substitution definitions corresponding to +XML character entity sets, from the following standards: ISO 8879 & +ISO 9573-13 (combined), MathML, and XHTML1. They were generated by +the ``tools/dev/unicode2rstsubs.py`` program from the input file +unicode.xml__, which is maintained as part of the MathML 2 +Recommentation XML source. + +__ https://www.w3.org/2003/entities/xml/ + +=================== ================================================= +Entity Set File Description +=================== ================================================= +isoamsa.txt_ Added Mathematical Symbols: Arrows +isoamsb.txt_ Added Mathematical Symbols: Binary Operators +isoamsc.txt_ Added Mathematical Symbols: Delimiters +isoamsn.txt_ Added Mathematical Symbols: Negated Relations +isoamso.txt_ Added Mathematical Symbols: Ordinary +isoamsr.txt_ Added Mathematical Symbols: Relations +isobox.txt_ Box and Line Drawing +isocyr1.txt_ Russian Cyrillic +isocyr2.txt_ Non-Russian Cyrillic +isodia.txt_ Diacritical Marks +isogrk1.txt_ Greek Letters +isogrk2.txt_ Monotoniko Greek +isogrk3.txt_ Greek Symbols +isogrk4.txt_ [1]_ Alternative Greek Symbols +isolat1.txt_ Added Latin 1 +isolat2.txt_ Added Latin 2 +isomfrk.txt_ [1]_ Mathematical Fraktur +isomopf.txt_ [1]_ Mathematical Openface (Double-struck) +isomscr.txt_ [1]_ Mathematical Script +isonum.txt_ Numeric and Special Graphic +isopub.txt_ Publishing +isotech.txt_ General Technical +mmlalias.txt_ MathML aliases for entities from other sets +mmlextra.txt_ [1]_ Extra names added by MathML +xhtml1-lat1.txt_ XHTML Latin 1 +xhtml1-special.txt_ XHTML Special Characters +xhtml1-symbol.txt_ XHTML Mathematical, Greek and Symbolic Characters +=================== ================================================= + +.. [1] There are ``*-wide.txt`` variants for each of these character + entity set files, containing characters outside of the Unicode + basic multilingual plane or BMP (wide-Unicode; code points greater + than U+FFFF). Most pre-built Python distributions are "narrow" and + do not support wide-Unicode characters. Python *can* be built with + wide-Unicode support though; consult the Python build instructions + for details. + +For example, the copyright symbol is defined as the XML character +entity ``©``. The equivalent reStructuredText substitution +reference (defined in both ``isonum.txt`` and ``xhtml1-lat1.txt``) is +``|copy|``. + +.. _isoamsa.txt: ../../../docutils/parsers/rst/include/isoamsa.txt +.. _isoamsb.txt: ../../../docutils/parsers/rst/include/isoamsb.txt +.. _isoamsc.txt: ../../../docutils/parsers/rst/include/isoamsc.txt +.. _isoamsn.txt: ../../../docutils/parsers/rst/include/isoamsn.txt +.. _isoamso.txt: ../../../docutils/parsers/rst/include/isoamso.txt +.. _isoamsr.txt: ../../../docutils/parsers/rst/include/isoamsr.txt +.. _isobox.txt: ../../../docutils/parsers/rst/include/isobox.txt +.. _isocyr1.txt: ../../../docutils/parsers/rst/include/isocyr1.txt +.. _isocyr2.txt: ../../../docutils/parsers/rst/include/isocyr2.txt +.. _isodia.txt: ../../../docutils/parsers/rst/include/isodia.txt +.. _isogrk1.txt: ../../../docutils/parsers/rst/include/isogrk1.txt +.. _isogrk2.txt: ../../../docutils/parsers/rst/include/isogrk2.txt +.. _isogrk3.txt: ../../../docutils/parsers/rst/include/isogrk3.txt +.. _isogrk4.txt: ../../../docutils/parsers/rst/include/isogrk4.txt +.. _isolat1.txt: ../../../docutils/parsers/rst/include/isolat1.txt +.. _isolat2.txt: ../../../docutils/parsers/rst/include/isolat2.txt +.. _isomfrk.txt: ../../../docutils/parsers/rst/include/isomfrk.txt +.. _isomopf.txt: ../../../docutils/parsers/rst/include/isomopf.txt +.. _isomscr.txt: ../../../docutils/parsers/rst/include/isomscr.txt +.. _isonum.txt: ../../../docutils/parsers/rst/include/isonum.txt +.. _isopub.txt: ../../../docutils/parsers/rst/include/isopub.txt +.. _isotech.txt: ../../../docutils/parsers/rst/include/isotech.txt +.. _mmlalias.txt: ../../../docutils/parsers/rst/include/mmlalias.txt +.. _mmlextra.txt: ../../../docutils/parsers/rst/include/mmlextra.txt +.. _xhtml1-lat1.txt: ../../../docutils/parsers/rst/include/xhtml1-lat1.txt +.. _xhtml1-special.txt: ../../../docutils/parsers/rst/include/xhtml1-special.txt +.. _xhtml1-symbol.txt: ../../../docutils/parsers/rst/include/xhtml1-symbol.txt + + +S5/HTML Definitions +=================== + +The "s5defs.txt_" standard definition file contains interpreted text +roles (classes) and other definitions for documents destined to become +`S5/HTML slide shows`_. + +.. _s5defs.txt: ../../../docutils/parsers/rst/include/s5defs.txt +.. _S5/HTML slide shows: ../../user/slide-shows.html + + +.. + Local Variables: + mode: indented-text + indent-tabs-mode: nil + sentence-end-double-space: t + fill-column: 70 + End: diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/ref/rst/directives.txt b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/ref/rst/directives.txt new file mode 100644 index 00000000..caa46cbe --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/ref/rst/directives.txt @@ -0,0 +1,2075 @@ +============================= + reStructuredText Directives +============================= +:Author: David Goodger +:Contact: docutils-develop@lists.sourceforge.net +:Revision: $Revision$ +:Date: $Date$ +:Copyright: This document has been placed in the public domain. + +.. contents:: + :depth: 2 + +This document describes the directives implemented in the reference +reStructuredText parser. + +Directives have the following syntax:: + + +-------+-------------------------------+ + | ".. " | directive type "::" directive | + +-------+ block | + | | + +-------------------------------+ + +Directives begin with an explicit markup start (two periods and a +space), followed by the directive type and two colons (collectively, +the "directive marker"). The directive block begins immediately after +the directive marker, and includes all subsequent indented lines. The +directive block is divided into arguments, options (a field list), and +content (in that order), any of which may appear. See the Directives_ +section in the `reStructuredText Markup Specification`_ for syntax +details. + +Descriptions below list "doctree elements" (document tree element +names; XML DTD generic identifiers) corresponding to individual +directives. For details on the hierarchy of elements, please see `The +Docutils Document Tree`_ and the `Docutils Generic DTD`_ XML document +type definition. For directive implementation details, see `Creating +reStructuredText Directives`_. + +.. _Directives: restructuredtext.html#directives +.. _reStructuredText Markup Specification: restructuredtext.html +.. _The Docutils Document Tree: ../doctree.html +.. _Docutils Generic DTD: ../docutils.dtd +.. _Creating reStructuredText Directives: + ../../howto/rst-directives.html + + +------------- + Admonitions +------------- + +.. From Webster's Revised Unabridged Dictionary (1913) [web1913]: + Admonition + Gentle or friendly reproof; counseling against a fault or + error; expression of authoritative advice; friendly caution + or warning. + + Syn: {Admonition}, {Reprehension}, {Reproof}. + + Usage: Admonition is prospective, and relates to moral delinquencies; + its object is to prevent further transgression. + +.. _attention: +.. _caution: +.. _danger: +.. _error: +.. _hint: +.. _important: +.. _note: +.. _tip: +.. _warning: + +Specific Admonitions +==================== + +:Directive Types: "attention", "caution", "danger", "error", "hint", + "important", "note", "tip", "warning", "admonition" +:Doctree Elements: attention, caution, danger, error, hint, important, + note, tip, warning, admonition_, title_ +:Directive Arguments: None. +:Directive Options: class_, name_ +:Directive Content: Interpreted as body elements. + +Admonitions are specially marked "topics" that can appear anywhere an +ordinary body element can. They contain arbitrary body elements. +Typically, an admonition is rendered as an offset block in a document, +sometimes outlined or shaded, with a title matching the admonition +type. For example:: + + .. DANGER:: + Beware killer rabbits! + +This directive might be rendered something like this:: + + +------------------------+ + | !DANGER! | + | | + | Beware killer rabbits! | + +------------------------+ + +The following admonition directives have been implemented: + +- attention +- caution +- danger +- error +- hint +- important +- note +- tip +- warning + +Any text immediately following the directive indicator (on the same +line and/or indented on following lines) is interpreted as a directive +block and is parsed for normal body elements. For example, the +following "note" admonition directive contains one paragraph and a +bullet list consisting of two list items:: + + .. note:: This is a note admonition. + This is the second line of the first paragraph. + + - The note contains all indented body elements + following. + - It includes this bullet list. + + +Generic Admonition +================== + +:Directive Type: "admonition" +:Doctree Elements: admonition_, title_ +:Directive Arguments: One, required (admonition title) +:Directive Options: class_, name_ +:Directive Content: Interpreted as body elements. + +This is a generic, titled admonition. The title may be anything the +author desires. + +The author-supplied title is also used as a `"classes"`_ attribute value +after being converted into a valid identifier form (down-cased; +non-alphanumeric characters converted to single hyphens; "admonition-" +prefixed). For example, this admonition:: + + .. admonition:: And, by the way... + + You can make up your own admonition too. + +becomes the following document tree (pseudo-XML):: + + <document source="test data"> + <admonition classes="admonition-and-by-the-way"> + <title> + And, by the way... + <paragraph> + You can make up your own admonition too. + +The class_ option overrides the computed `"classes"`_ attribute +value. + + +-------- + Images +-------- + +There are two image directives: "image" and "figure". + +.. attention:: + + It is up to the author to ensure compatibility of the image data format + with the output format or user agent (LaTeX engine, `HTML browser`__). + The following, non exhaustive table provides an overview: + + =========== ====== ====== ===== ===== ===== ===== ===== ===== ===== ===== + .. vector image raster image moving image [#]_ + ----------- ------------- ----------------------------- ----------------- + .. SVG PDF PNG JPG GIF APNG AVIF WebM MP4 OGG + =========== ====== ====== ===== ===== ===== ===== ===== ===== ===== ===== + HTML4_ ✓ [#]_ ✓ ✓ ✓ (✓) (✓) + + HTML5_ ✓ ✓ ✓ ✓ ✓ ✓ ✓ ✓ ✓ + + LaTeX_ [#]_ ✓ ✓ ✓ + + ODT_ ✓ ✓ ✓ ✓ ✓ + =========== ====== ====== ===== ===== ===== ===== ===== ===== ===== ===== + + .. [#] The `html5 writer`_ uses the ``<video>`` tag if the image URI + ends with an extension matching one of the listed video formats + (since Docutils 0.17). + + .. [#] The html4 writer uses an ``<object>`` tag for SVG images + for better compatibility with older browsers. + + .. [#] When compiling with ``pdflatex``, ``xelatex``, or ``lualatex``. + The original ``latex`` engine supports only the EPS image format. + Some build systems, e.g. rubber_ support additional formats + via on-the-fly image conversion. + +__ https://developer.mozilla.org/en-US/docs/Web/Media/Formats/Image_types +.. _HTML4: +.. _html4 writer: ../../user/html.html#html4css1 +.. _HTML5: +.. _html5 writer: ../../user/html.html#html5-polyglot +.. _LaTeX: ../../user/latex.html#image-inclusion +.. _ODT: ../../user/odt.html +.. _rubber: https://github.com/petrhosek/rubber + + +Image +===== + +:Directive Type: "image" +:Doctree Element: image_ +:Directive Arguments: One, required (image URI). +:Directive Options: Possible (see below). +:Directive Content: None. + +An "image" is a simple picture:: + + .. image:: picture.png + +Inline images can be defined with an "image" directive in a `substitution +definition`_ + +The URI for the image source file is specified in the directive +argument. As with hyperlink targets, the image URI may begin on the +same line as the explicit markup start and target name, or it may +begin in an indented text block immediately following, with no +intervening blank lines. If there are multiple lines in the link +block, they are stripped of leading and trailing whitespace and joined +together. + +Optionally, the image link block may contain a flat field list, the +_`image options`. For example:: + + .. image:: picture.jpeg + :height: 100px + :width: 200 px + :scale: 50 % + :alt: alternate text + :align: right + +The following options are recognized: + +``alt`` : text + Alternate text: a short description of the image, displayed by + applications that cannot display images, or spoken by applications + for visually impaired users. + +``height`` : `length`_ + The desired height of the image. + Used to reserve space or scale the image vertically. When the "scale" + option is also specified, they are combined. For example, a height of + 200px and a scale of 50 is equivalent to a height of 100px with no scale. + +``width`` : `length`_ or `percentage`_ of the current line width + The width of the image. + Used to reserve space or scale the image horizontally. As with "height" + above, when the "scale" option is also specified, they are combined. + +``scale`` : integer percentage (the "%" symbol is optional) + The uniform scaling factor of the image. The default is "100 %", i.e. + no scaling. + + If no "height" or "width" options are specified, the `Python + Imaging Library` (PIL/Pillow_) may be used to determine them, if + it is installed and the image file is available. + +``align`` : "top", "middle", "bottom", "left", "center", or "right" + The alignment of the image, equivalent to the HTML ``<img>`` tag's + deprecated "align" attribute or the corresponding "vertical-align" and + "text-align" CSS properties. + The values "top", "middle", and "bottom" + control an image's vertical alignment (relative to the text + baseline); they are only useful for inline images (substitutions). + The values "left", "center", and "right" control an image's + horizontal alignment, allowing the image to float and have the + text flow around it. The specific behavior depends upon the + browser or rendering software used. + +``target`` : text (URI or reference name) + Makes the image into a hyperlink reference ("clickable"). The + option argument may be a URI (relative or absolute), or a + `reference name`_ with underscore suffix (e.g. ```a name`_``). + +and the common options class_ and name_. + + +Figure +====== + +:Directive Type: "figure" +:Doctree Elements: figure_, image_, caption_, legend_ +:Directive Arguments: One, required (image URI). +:Directive Options: Possible (see below). +:Directive Content: Interpreted as the figure caption and an optional + legend. + +A "figure" consists of image_ data (including `image options`_), an optional +caption (a single paragraph), and an optional legend (arbitrary body +elements). For page-based output media, figures might float to a different +position if this helps the page layout. +:: + + .. figure:: picture.png + :scale: 50 % + :alt: map to buried treasure + + This is the caption of the figure (a simple paragraph). + + The legend consists of all elements after the caption. In this + case, the legend consists of this paragraph and the following + table: + + +-----------------------+-----------------------+ + | Symbol | Meaning | + +=======================+=======================+ + | .. image:: tent.png | Campground | + +-----------------------+-----------------------+ + | .. image:: waves.png | Lake | + +-----------------------+-----------------------+ + | .. image:: peak.png | Mountain | + +-----------------------+-----------------------+ + +There must be blank lines before the caption paragraph and before the +legend. To specify a legend without a caption, use an empty comment +("..") in place of the caption. + +The "figure" directive supports all of the options of the "image" +directive (see `image options`_ above). These options (except +"align") are passed on to the contained image. + +``align`` : "left", "center", or "right" + The horizontal alignment of the figure, allowing the image to + float and have the text flow around it. The specific behavior + depends upon the browser or rendering software used. + +In addition, the following options are recognized: + +``figwidth`` : "image", length_, or percentage_ of current line width + The width of the figure. + Limits the horizontal space used by the figure. + A special value of "image" is allowed, in which case the + included image's actual width is used (requires the `Python Imaging + Library`_). If the image file is not found or the required software is + unavailable, this option is ignored. + + Sets the "width" attribute of the "figure" doctree element. + + This option does not scale the included image; use the "width" + `image`_ option for that. :: + + +---------------------------+ + | figure | + | | + |<------ figwidth --------->| + | | + | +---------------------+ | + | | image | | + | | | | + | |<--- width --------->| | + | +---------------------+ | + | | + |The figure's caption should| + |wrap at this width. | + +---------------------------+ + +``figclass`` : text + Set a `"classes"`_ attribute value on the figure element. See the + class_ directive below. + +.. _Python Imaging Library: +.. _Pillow: https://pypi.org/project/Pillow/ + + +--------------- + Body Elements +--------------- + +Topic +===== + +:Directive Type: "topic" +:Doctree Element: topic_ +:Directive Arguments: One, required (topic title). +:Directive Options: class_, name_ +:Directive Content: Interpreted as the topic body. + +A topic is like a block quote with a title, or a self-contained +section with no subsections. Use the "topic" directive to indicate a +self-contained idea that is separate from the flow of the document. +Topics may occur anywhere a section or transition may occur. Body +elements and topics may not contain nested topics. + +The directive's sole argument is interpreted as the topic title; the +next line must be blank. All subsequent lines make up the topic body, +interpreted as body elements. For example:: + + .. topic:: Topic Title + + Subsequent indented lines comprise + the body of the topic, and are + interpreted as body elements. + + +Sidebar +======= + +:Directive Type: "sidebar" +:Doctree Element: sidebar_ +:Directive Arguments: One, optional (sidebar title). +:Directive Options: Possible (see below). +:Directive Content: Interpreted as the sidebar body. + +Sidebars are like miniature, parallel documents that occur inside +other documents, providing related or reference material. A sidebar +is typically offset by a border and "floats" to the side of the page; +the document's main text may flow around it. Sidebars can also be +likened to super-footnotes; their content is outside of the flow of +the document's main text. + +Sidebars may occur anywhere a section or transition may occur. Body +elements (including sidebars) may not contain nested sidebars. + +The directive's sole argument is interpreted as the sidebar title, +which may be followed by a subtitle option (see below); the next line +must be blank. All subsequent lines make up the sidebar body, +interpreted as body elements. For example:: + + .. sidebar:: Optional Sidebar Title + :subtitle: Optional Sidebar Subtitle + + Subsequent indented lines comprise + the body of the sidebar, and are + interpreted as body elements. + +The following options are recognized: + +``subtitle`` : text + The sidebar's subtitle. + +and the common options class_ and name_. + + +Line Block +========== + +.. admonition:: Deprecated + + The "line-block" directive is deprecated. Use the `line block + syntax`_ instead. + + .. _line block syntax: restructuredtext.html#line-blocks + +:Directive Type: "line-block" +:Doctree Element: line_block_ +:Directive Arguments: None. +:Directive Options: class_, name_ +:Directive Content: Becomes the body of the line block. + +The "line-block" directive constructs an element where line breaks and +initial indentation is significant and inline markup is supported. It +is equivalent to a `parsed literal block`_ with different rendering: +typically in an ordinary serif typeface instead of a +typewriter/monospaced face, and not automatically indented. (Have the +line-block directive begin a block quote to get an indented line +block.) Line blocks are useful for address blocks and verse (poetry, +song lyrics), where the structure of lines is significant. For +example, here's a classic:: + + "To Ma Own Beloved Lassie: A Poem on her 17th Birthday", by + Ewan McTeagle (for Lassie O'Shea): + + .. line-block:: + + Lend us a couple of bob till Thursday. + I'm absolutely skint. + But I'm expecting a postal order and I can pay you back + as soon as it comes. + Love, Ewan. + + + +.. _parsed-literal: + +Parsed Literal Block +==================== + +:Directive Type: "parsed-literal" +:Doctree Element: literal_block_ +:Directive Arguments: None. +:Directive Options: class_, name_ +:Directive Content: Becomes the body of the literal block. + +Unlike an ordinary literal block, the "parsed-literal" directive +constructs a literal block where the text is parsed for inline markup. +It is equivalent to a `line block`_ with different rendering: +typically in a typewriter/monospaced typeface, like an ordinary +literal block. Parsed literal blocks are useful for adding hyperlinks +to code examples. + +However, care must be taken with the text, because inline markup is +recognized and there is no protection from parsing. Backslash-escapes +may be necessary to prevent unintended parsing. And because the +markup characters are removed by the parser, care must also be taken +with vertical alignment. Parsed "ASCII art" is tricky, and extra +whitespace may be necessary. + +For example, all the element names in this content model are links:: + + .. parsed-literal:: + + ( (title_, subtitle_?)?, + decoration_?, + (docinfo_, transition_?)?, + `%structure.model;`_ ) + +Code +==== + +:Directive Type: "code" +:Doctree Element: literal_block_, `inline elements`_ +:Directive Arguments: One, optional (formal language). +:Directive Options: name, class, number-lines. +:Directive Content: Becomes the body of the literal block. +:Configuration Setting: syntax_highlight_. + +The "code" directive constructs a literal block. If the code language is +specified, the content is parsed by the Pygments_ syntax highlighter and +tokens are stored in nested `inline elements`_ with class arguments +according to their syntactic category. The actual highlighting requires +a style-sheet (e.g. one `generated by Pygments`__, see the +`sandbox/stylesheets`__ for examples). + +The parsing can be turned off with the syntax_highlight_ configuration +setting and command line option or by specifying the language as class_ +option instead of directive argument. This also avoids warnings +when Pygments_ is not installed or the language is not in the +`supported languages and markup formats`_. + +For inline code, use the `"code" role`_. + +__ https://pygments.org/docs/cmdline/#generating-styles +__ https://docutils.sourceforge.io/sandbox/stylesheets/ +.. _Pygments: https://pygments.org/ +.. _syntax_highlight: ../../user/config.html#syntax-highlight +.. _supported languages and markup formats: https://pygments.org/languages/ +.. _"code" role: roles.html#code + + +The following options are recognized: + +``number-lines`` : [integer] (start line number) + Precede every line with a line number. + The optional argument is the number of the first line (default 1). + +and the common options class_ and name_. + +Example:: + The content of the following directive :: + + .. code:: python + + def my_function(): + "just a test" + print 8/2 + + is parsed and marked up as Python source code. + + +Math +==== + +:Directive Type: "math" +:Doctree Element: math_block_ +:Directive Arguments: None. +:Directive Options: class_, name_ +:Directive Content: Becomes the body of the math block. + (Content blocks separated by a blank line are put in + adjacent math blocks.) +:Configuration Setting: math_output_ + +The "math" directive inserts blocks with mathematical content +(display formulas, equations) into the document. The input format is +`LaTeX math syntax`_ with support for Unicode symbols, for example:: + + .. math:: + + α_t(i) = P(O_1, O_2, … O_t, q_t = S_i λ) + +Support is limited to a subset of *LaTeX math* by the conversion +required for many output formats. For HTML, the `math_output`_ +configuration setting (or the corresponding ``--math-output`` +command line option) select between alternative output formats with +different subsets of supported elements. If a writer does not +support math typesetting, the content is inserted verbatim. + +For inline formulas, use the `"math" role`_. + +.. _LaTeX math syntax: ../../ref/rst/mathematics.html +.. _"math" role: roles.html#math +.. _math_output: ../../user/config.html#math-output + + +Rubric +====== + +:Directive Type: "rubric" +:Doctree Element: rubric_ +:Directive Arguments: One, required (rubric text). +:Directive Options: class_, name_ +:Directive Content: None. + +.. + + rubric n. 1. a title, heading, or the like, in a manuscript, + book, statute, etc., written or printed in red or otherwise + distinguished from the rest of the text. ... + + -- Random House Webster's College Dictionary, 1991 + +The "rubric" directive inserts a "rubric" element into the document +tree. A rubric is like an informal heading that doesn't correspond to +the document's structure. + + +Epigraph +======== + +:Directive Type: "epigraph" +:Doctree Element: block_quote_ +:Directive Arguments: None. +:Directive Options: None. +:Directive Content: Interpreted as the body of the block quote. + +An epigraph is an apposite (suitable, apt, or pertinent) short +inscription, often a quotation or poem, at the beginning of a document +or section. + +The "epigraph" directive produces an "epigraph"-class block quote. +For example, this input:: + + .. epigraph:: + + No matter where you go, there you are. + + -- Buckaroo Banzai + +becomes this document tree fragment:: + + <block_quote classes="epigraph"> + <paragraph> + No matter where you go, there you are. + <attribution> + Buckaroo Banzai + + +Highlights +========== + +:Directive Type: "highlights" +:Doctree Element: block_quote_ +:Directive Arguments: None. +:Directive Options: None. +:Directive Content: Interpreted as the body of the block quote. + +Highlights summarize the main points of a document or section, often +consisting of a list. + +The "highlights" directive produces a "highlights"-class block quote. +See Epigraph_ above for an analogous example. + + +Pull-Quote +========== + +:Directive Type: "pull-quote" +:Doctree Element: block_quote_ +:Directive Arguments: None. +:Directive Options: None. +:Directive Content: Interpreted as the body of the block quote. + +A pull-quote is a small selection of text "pulled out and quoted", +typically in a larger typeface. Pull-quotes are used to attract +attention, especially in long articles. + +The "pull-quote" directive produces a "pull-quote"-class block quote. +See Epigraph_ above for an analogous example. + + +Compound Paragraph +================== + +:Directive Type: "compound" +:Doctree Element: compound_ +:Directive Arguments: None. +:Directive Options: class_, name_ +:Directive Content: Interpreted as body elements. + +The "compound" directive is used to create a compound paragraph, which +is a single logical paragraph containing multiple physical body +elements such as simple paragraphs, literal blocks, tables, lists, +etc., instead of directly containing text and inline elements. For +example:: + + .. compound:: + + The 'rm' command is very dangerous. If you are logged + in as root and enter :: + + cd / + rm -rf * + + you will erase the entire contents of your file system. + +In the example above, a literal block is "embedded" within a sentence +that begins in one physical paragraph and ends in another. + +.. note:: + + The "compound" directive is *not* a generic block-level container + like HTML's ``<div>`` element. Do not use it only to group a + sequence of elements, or you may get unexpected results. + + If you need a generic block-level container, please use the + container_ directive, described below. + +Compound paragraphs are typically rendered as multiple distinct text +blocks, with the possibility of variations to emphasize their logical +unity: + +* If paragraphs are rendered with a first-line indent, only the first + physical paragraph of a compound paragraph should have that indent + -- second and further physical paragraphs should omit the indents; +* vertical spacing between physical elements may be reduced; +* and so on. + + +Container +========= + +:Directive Type: "container" +:Doctree Element: `container <container element_>`__ +:Directive Arguments: One or more, optional (class names). +:Directive Options: name_ +:Directive Content: Interpreted as body elements. + +The "container" directive surrounds its contents (arbitrary body +elements) with a generic block-level "container" element. Combined +with the optional "classes_" attribute argument(s), this is an +extension mechanism for users & applications. For example:: + + .. container:: custom + + This paragraph might be rendered in a custom way. + +Parsing the above results in the following pseudo-XML:: + + <container classes="custom"> + <paragraph> + This paragraph might be rendered in a custom way. + +The "container" directive is the equivalent of HTML's ``<div>`` +element. It may be used to group a sequence of elements for user- or +application-specific purposes. + + + +-------- + Tables +-------- + +Formal tables need more structure than the reStructuredText syntax +supplies. Tables may be given titles with the table_ directive. +Sometimes reStructuredText tables are inconvenient to write, or table +data in a standard format is readily available. The csv-table_ +directive supports CSV data. + + +Table +===== + +:Directive Type: "table" +:Doctree Element: table_ +:Directive Arguments: One, optional (table title). +:Directive Options: Possible (see below). +:Directive Content: A normal `reStructuredText table`_. + +The "table" directive is used to associate a +title with a table or specify options, e.g.:: + + .. table:: Truth table for "not" + :widths: auto + + ===== ===== + A not A + ===== ===== + False True + True False + ===== ===== + +The following options are recognized: + +``align`` : "left", "center", or "right" + The horizontal alignment of the table (new in Docutils 0.13). + +``width`` : `length`_ or `percentage`_ + Sets the width of the table to the specified length or percentage + of the line width. If omitted, the renderer determines the width + of the table based on its contents or the column ``widths``. + + .. _column-widths: + +``widths`` : "auto", "grid", or a list of integers + Explicitly set column widths. + Specifies relative widths if used with the ``width`` option. + Overrides a `table_style`_ setting or class value "colwidths-auto". + The default depends on the `table_style`_ configuration setting. + + *"auto"* delegates the determination of column widths to the backend + (LaTeX, the HTML browser, ...). + Default for the `html5 writer`_ + + *"grid"* determines column widths from the widths of the input columns + (in characters). + Default for most writers. + + A *list of integers* is used instead of the input column widths. + Implies *"grid"*. + +Plus the common options class_ and name_. + +.. _reStructuredText table: restructuredtext.html#tables +.. _table_style: ../../user/config.html#table-style + +.. _csv-table: + +CSV Table +========= + +:Directive Type: "csv-table" +:Doctree Element: table_ +:Directive Arguments: One, optional (table title). +:Directive Options: Possible (see below). +:Directive Content: A CSV (comma-separated values) table. + +.. WARNING:: + + The "csv-table" directive's ":file:" and ":url:" options represent + a potential security holes. They can be disabled with the + "file_insertion_enabled_" runtime setting. + +The "csv-table" directive is used to create a table from CSV +(comma-separated values) data. CSV is a common data format generated +by spreadsheet applications and commercial databases. The data may be +internal (an integral part of the document) or external (a separate +file). + +* Block markup and inline markup within cells is supported. Line ends + are recognized within cells. + +* There is no support for checking that the number of columns in each + row is the same. The directive automatically adds empty entries at + the end of short rows. + + .. Add "strict" option to verify input? + +Example:: + + .. csv-table:: Frozen Delights! + :header: "Treat", "Quantity", "Description" + :widths: 15, 10, 30 + + "Albatross", 2.99, "On a stick!" + "Crunchy Frog", 1.49, "If we took the bones out, it wouldn't be + crunchy, now would it?" + "Gannet Ripple", 1.99, "On a stick!" + +The following options are recognized: + +``align`` : "left", "center", or "right" + The horizontal alignment of the table. (New in Docutils 0.13) + +``delim`` : char | "tab" | "space" [#whitespace-delim]_ + A one-character string\ [#ASCII-char]_ used to separate fields. + Defaults to ``,`` (comma). May be specified as a Unicode code + point; see the unicode_ directive for syntax details. + +``encoding`` : string + The text encoding of the external CSV data (file or URL). + Defaults to the document's input_encoding_. + +``escape`` : char + A one-character\ [#ASCII-char]_ string used to escape the + delimiter or quote characters. May be specified as a Unicode + code point; see the unicode_ directive for syntax details. Used + when the delimiter is used in an unquoted field, or when quote + characters are used within a field. The default is to double-up + the character, e.g. "He said, ""Hi!""" + + .. Add another possible value, "double", to explicitly indicate + the default case? + +``file`` : string (newlines removed) + The local filesystem path to a CSV data file. + +``header`` : CSV data + Supplemental data for the table header, added independently of and + before any ``header-rows`` from the main CSV data. Must use the + same CSV format as the main CSV data. + +``header-rows`` : integer + The number of rows of CSV data to use in the table header. + Defaults to 0. + +``keepspace`` : flag (empty) + Treat whitespace immediately following the delimiter as + significant. The default is to ignore such whitespace. + +``quote`` : char + A one-character string\ [#ASCII-char]_ used to quote elements + containing the delimiter or which start with the quote + character. Defaults to ``"`` (quote). May be specified as a + Unicode code point; see the unicode_ directive for syntax + details. + +``stub-columns`` : integer + The number of table columns to use as stubs (row titles, on the + left). Defaults to 0. + +``url`` : string (whitespace removed) + An Internet URL reference to a CSV data file. + +``widths`` : integer [integer...] or "auto" + A list of relative column widths. + The default is equal-width columns (100%/#columns). + + "auto" delegates the determination of column widths to the backend + (LaTeX, the HTML browser, ...). + +``width`` : `length`_ or `percentage`_ + Sets the width of the table to the specified length or percentage + of the line width. If omitted, the renderer determines the width + of the table based on its contents or the column ``widths``. + +and the common options class_ and name_. + +.. [#whitespace-delim] Whitespace delimiters are supported only for external + CSV files. + +.. [#ASCII-char] With Python 2, the values for the ``delimiter``, + ``quote``, and ``escape`` options must be ASCII characters. (The csv + module does not support Unicode and all non-ASCII characters are + encoded as multi-byte utf-8 string). This limitation does not exist + under Python 3. + + +List Table +========== + +:Directive Type: "list-table" +:Doctree Element: table_ +:Directive Arguments: One, optional (table title). +:Directive Options: Possible (see below). +:Directive Content: A uniform two-level bullet list. + +(This is an initial implementation; `further ideas`__ may be implemented +in the future.) + +__ ../../dev/rst/alternatives.html#list-driven-tables + +The "list-table" directive is used to create a table from data in a +uniform two-level bullet list. "Uniform" means that each sublist +(second-level list) must contain the same number of list items. + +Example:: + + .. list-table:: Frozen Delights! + :widths: 15 10 30 + :header-rows: 1 + + * - Treat + - Quantity + - Description + * - Albatross + - 2.99 + - On a stick! + * - Crunchy Frog + - 1.49 + - If we took the bones out, it wouldn't be + crunchy, now would it? + * - Gannet Ripple + - 1.99 + - On a stick! + +The following options are recognized: + + +``align`` : "left", "center", or "right" + The horizontal alignment of the table. + (New in Docutils 0.13) + +``header-rows`` : integer + The number of rows of list data to use in the table header. + Defaults to 0. + +``stub-columns`` : integer + The number of table columns to use as stubs (row titles, on the + left). Defaults to 0. + + .. _table width: + +``width`` : `length`_ or `percentage`_ + Sets the width of the table to the specified length or percentage + of the line width. If omitted, the renderer determines the width + of the table based on its contents or the column ``widths``. + + .. _column widths: + +``widths`` : integer [integer...] or "auto" + A list of relative column widths. + The default is equal-width columns (100%/#columns). + + "auto" delegates the determination of column widths to the backend + (LaTeX, the HTML browser, ...). + +and the common options class_ and name_. + + +---------------- + Document Parts +---------------- + +.. _contents: + +Table of Contents +================= + +:Directive Type: "contents" +:Doctree Elements: pending_, topic_ +:Directive Arguments: One, optional: title. +:Directive Options: Possible (see below). +:Directive Content: None. + +The "contents" directive generates a table of contents (TOC) in a +topic_. Topics, and therefore tables of contents, may occur anywhere +a section or transition may occur. Body elements and topics may not +contain tables of contents. + +Here's the directive in its simplest form:: + + .. contents:: + +Language-dependent boilerplate text will be used for the title. The +English default title text is "Contents". + +An explicit title may be specified:: + + .. contents:: Table of Contents + +The title may span lines, although it is not recommended:: + + .. contents:: Here's a very long Table of + Contents title + +Options may be specified for the directive, using a field list:: + + .. contents:: Table of Contents + :depth: 2 + +If the default title is to be used, the options field list may begin +on the same line as the directive marker:: + + .. contents:: :depth: 2 + +The following options are recognized: + +``depth`` : integer + The number of section levels that are collected in the table of + contents. The default is unlimited depth. + +``local`` : flag (empty) + Generate a local table of contents. Entries will only include + subsections of the section in which the directive is given. If no + explicit title is given, the table of contents will not be titled. + +``backlinks`` : "entry" or "top" or "none" + Generate links from section headers back to the table of contents + entries, the table of contents itself, or generate no back-links. + +``class`` : text + Set a `"classes"`_ attribute value on the topic element. See the + class_ directive below. + + +.. _sectnum: +.. _section-numbering: + +Automatic Section Numbering +=========================== + +:Directive Type: "sectnum" or "section-numbering" (synonyms) +:Doctree Elements: pending_, generated_ +:Directive Arguments: None. +:Directive Options: Possible (see below). +:Directive Content: None. +:Configuration Setting: sectnum_xform_ + +The "sectnum" (or "section-numbering") directive automatically numbers +sections and subsections in a document (if not disabled by the +``--no-section-numbering`` command line option or the `sectnum_xform`_ +configuration setting). + +Section numbers are of the "multiple enumeration" form, where each +level has a number, separated by periods. For example, the title of section +1, subsection 2, subsubsection 3 would have "1.2.3" prefixed. + +The "sectnum" directive does its work in two passes: the initial parse +and a transform. During the initial parse, a "pending" element is +generated which acts as a placeholder, storing any options internally. +At a later stage in the processing, the "pending" element triggers a +transform, which adds section numbers to titles. Section numbers are +enclosed in a "generated" element, and titles have their "auto" +attribute set to "1". + +The following options are recognized: + +``depth`` : integer + The number of section levels that are numbered by this directive. + The default is unlimited depth. + +``prefix`` : string + An arbitrary string that is prefixed to the automatically + generated section numbers. It may be something like "3.2.", which + will produce "3.2.1", "3.2.2", "3.2.2.1", and so on. Note that + any separating punctuation (in the example, a period, ".") must be + explicitly provided. The default is no prefix. + +``suffix`` : string + An arbitrary string that is appended to the automatically + generated section numbers. The default is no suffix. + +``start`` : integer + The value that will be used for the first section number. + Combined with ``prefix``, this may be used to force the right + numbering for a document split over several source files. The + default is 1. + +.. _sectnum_xform: ../../user/config.html#sectnum-xform + + +.. _header: +.. _footer: + +Document Header & Footer +======================== + +:Directive Types: "header" and "footer" +:Doctree Elements: decoration_, header, footer +:Directive Arguments: None. +:Directive Options: None. +:Directive Content: Interpreted as body elements. + +The "header" and "footer" directives create document decorations, +useful for page navigation, notes, time/datestamp, etc. For example:: + + .. header:: This space for rent. + +This will add a paragraph to the document header, which will appear at +the top of the generated web page or at the top of every printed page. + +These directives may be used multiple times, cumulatively. There is +currently support for only one header and footer. + +.. note:: + + While it is possible to use the "header" and "footer" directives to + create navigational elements for web pages, you should be aware + that Docutils is meant to be used for *document* processing, and + that a navigation bar is not typically part of a document. + + Thus, you may soon find Docutils' abilities to be insufficient for + these purposes. At that time, you should consider using a + documentation generator like Sphinx_ rather than the "header" and + "footer" directives. + + .. _Sphinx: http://sphinx-doc.org/ + +In addition to the use of these directives to populate header and +footer content, content may also be added automatically by the +processing system. For example, if certain runtime settings are +enabled, the document footer is populated with processing information +such as a datestamp, a link to `the Docutils website`_, etc. + +.. _the Docutils website: https://docutils.sourceforge.io + + +------------ + References +------------ + +.. _target-notes: + +Target Footnotes +================ + +:Directive Type: "target-notes" +:Doctree Elements: pending_, footnote_, footnote_reference_ +:Directive Arguments: None. +:Directive Options: class_, name_ +:Directive Options: Possible (see below). +:Directive Content: None. + +The "target-notes" directive creates a footnote for each external +target in the text, and corresponding footnote references after each +reference. For every explicit target (of the form, ``.. _target name: +URL``) in the text, a footnote will be generated containing the +visible URL as content. + + +Footnotes +========= + +**NOT IMPLEMENTED YET** + +:Directive Type: "footnotes" +:Doctree Elements: pending_, topic_ +:Directive Arguments: None? +:Directive Options: Possible? +:Directive Content: None. + +@@@ + + +Citations +========= + +**NOT IMPLEMENTED YET** + +:Directive Type: "citations" +:Doctree Elements: pending_, topic_ +:Directive Arguments: None? +:Directive Options: Possible? +:Directive Content: None. + +@@@ + + +--------------- + HTML-Specific +--------------- + +Imagemap +======== + +**NOT IMPLEMENTED YET** + +Non-standard element: imagemap. + + +----------------------------------------- + Directives for Substitution Definitions +----------------------------------------- + +The directives in this section may only be used in `substitution +definitions`_. They may not be used directly, in standalone context. +The `image`_ directive may be used both in substitution definitions +and in the standalone context. + +.. _substitution definitions: +.. _substitution definition: restructuredtext.html#substitution-definitions + +.. _replace: + +Replacement Text +================ + +:Directive Type: "replace" +:Doctree Element: Text & `inline elements`_ +:Directive Arguments: None. +:Directive Options: None. +:Directive Content: A single paragraph; may contain inline markup. + +The "replace" directive is used to indicate replacement text for a +substitution reference. It may be used within `substitution +definitions`_ only. For example, this directive can be used to expand +abbreviations:: + + .. |reST| replace:: reStructuredText + + Yes, |reST| is a long word, so I can't blame anyone for wanting to + abbreviate it. + +As reStructuredText doesn't support nested inline markup, the only way +to create a reference with styled text is to use substitutions with +the "replace" directive:: + + I recommend you try |Python|_. + + .. |Python| replace:: Python, *the* best language around + .. _Python: https://www.python.org/ + + +.. _unicode: + +Unicode Character Codes +======================= + +:Directive Type: "unicode" +:Doctree Element: Text +:Directive Arguments: One or more, required (Unicode character codes, + optional text, and comments). +:Directive Options: Possible (see below). +:Directive Content: None. + +The "unicode" directive converts Unicode character codes (numerical +values) to characters, and may be used in `substitution definitions`_ +only. + +The arguments, separated by spaces, can be: + +* **character codes** as + + - decimal numbers or + + - hexadecimal numbers, prefixed by ``0x``, ``x``, ``\x``, ``U+``, + ``u``, or ``\u`` or as XML-style hexadecimal character entities, + e.g. ``ᨫ`` + +* **text**, which is used as-is. + +Text following " .. " is a comment and is ignored. The spaces between +the arguments are ignored and thus do not appear in the output. +Hexadecimal codes are case-insensitive. + +For example, the following text:: + + Copyright |copy| 2003, |BogusMegaCorp (TM)| |---| + all rights reserved. + + .. |copy| unicode:: 0xA9 .. copyright sign + .. |BogusMegaCorp (TM)| unicode:: BogusMegaCorp U+2122 + .. with trademark sign + .. |---| unicode:: U+02014 .. em dash + :trim: + +results in: + + Copyright |copy| 2003, |BogusMegaCorp (TM)| |---| + all rights reserved. + + .. |copy| unicode:: 0xA9 .. copyright sign + .. |BogusMegaCorp (TM)| unicode:: BogusMegaCorp U+2122 + .. with trademark sign + .. |---| unicode:: U+02014 .. em dash + :trim: + +The following options are recognized: + +``ltrim`` : flag (empty) + Whitespace to the left of the substitution reference is removed. + +``rtrim`` : flag (empty) + Whitespace to the right of the substitution reference is removed. + +``trim`` : flag (empty) + Equivalent to ``ltrim`` plus ``rtrim``; whitespace on both sides + of the substitution reference is removed. + + +Date +==== + +:Directive Type: "date" +:Doctree Element: Text +:Directive Arguments: One, optional (date format). +:Directive Options: None. +:Directive Content: None. + +The "date" directive generates the current local date and inserts it +into the document as text. This directive may be used in substitution +definitions only. + +The optional directive content is interpreted as the desired date +format, using the same codes as Python's `time.strftime()`__ function. The +default format is "%Y-%m-%d" (ISO 8601 date), but time fields can also +be used. Examples:: + + .. |date| date:: + .. |time| date:: %H:%M + + Today's date is |date|. + + This document was generated on |date| at |time|. + +__ https://docs.python.org/3/library/time.html#time.strftime + + +--------------- + Miscellaneous +--------------- + +.. _include: + +Including an External Document Fragment +======================================= + +:Directive Type: "include" +:Doctree Elements: Depend on data being included + (literal_block_ with ``code`` or ``literal`` option). +:Directive Arguments: One, required (path to the file to include). +:Directive Options: Possible (see below). +:Directive Content: None. +:Configuration Setting: file_insertion_enabled_ + +.. WARNING:: + + The "include" directive represents a potential security hole. It + can be disabled with the "file_insertion_enabled_" runtime setting. + + .. _file_insertion_enabled: ../../user/config.html#file-insertion-enabled + +The "include" directive reads a text file. The directive argument is +the path to the file to be included, relative to the document containing +the directive. Unless the options ``literal``, ``code``, or ``parser`` +are given, the file is parsed in the current document's context at the +point of the directive. For example:: + + This first example will be parsed at the document level, and can + thus contain any construct, including section headers. + + .. include:: inclusion.txt + + Back in the main document. + + This second example will be parsed in a block quote context. + Therefore it may only contain body elements. It may not + contain section headers. + + .. include:: inclusion.txt + +If an included document fragment contains section structure, the title +adornments must match those of the master document. + +Standard data files intended for inclusion in reStructuredText +documents are distributed with the Docutils source code, located in +the "docutils" package in the ``docutils/parsers/rst/include`` +directory. To access these files, use the special syntax for standard +"include" data files, angle brackets around the file name:: + + .. include:: <isonum.txt> + +The current set of standard "include" data files consists of sets of +substitution definitions. See `reStructuredText Standard Definition +Files`__ for details. + +__ definitions.html + +The following options are recognized: + +``start-line`` : integer + Only the content starting from this line will be included. + (As usual in Python, the first line has index 0 and negative values + count from the end.) + +``end-line`` : integer + Only the content up to (but excluding) this line will be included. + +``start-after`` : text to find in the external data file + Only the content after the first occurrence of the specified text + will be included. + +``end-before`` : text to find in the external data file + Only the content before the first occurrence of the specified text + (but after any ``after`` text) will be included. + +``parser`` : parser name + Parse the included content with the specified parser. + (New in Docutils 0.17) + +``literal`` : flag (empty) + The entire included text is inserted into the document as a single + literal block. + +``code`` : [string] (formal language) + The argument and the included content are passed to + the code_ directive (useful for program listings). + +``number-lines`` : [integer] (start line number) + Precede every code line with a line number. + The optional argument is the number of the first line (default 1). + Works only with ``code`` or ``literal``. + +``encoding`` : string + The text encoding of the external data file. Defaults to the + document's input_encoding_. + + .. _input_encoding: ../../user/config.html#input-encoding + +``tab-width`` : integer + Number of spaces for hard tab expansion. + A negative value prevents expansion of hard tabs. Defaults to the + tab_width_ configuration setting. + + .. _tab_width: ../../user/config.html#tab-width + +With ``code`` or ``literal`` the common options class_ and +name_ are recognized as well. + +Combining ``start/end-line`` and ``start-after/end-before`` is possible. The +text markers will be searched in the specified lines (further limiting the +included content). + +.. _raw-directive: + +Raw Data Pass-Through +===================== + +:Directive Type: "raw" +:Doctree Element: raw_ +:Directive Arguments: One or more, required (output format types). +:Directive Options: Possible (see below). +:Directive Content: Stored verbatim, uninterpreted. None (empty) if a + "file" or "url" option given. +:Configuration Setting: raw_enabled_ + +.. WARNING:: + + The "raw" directive represents a potential security hole. It can + be disabled with the "raw_enabled_" or "file_insertion_enabled_" + runtime settings. + + .. _raw_enabled: ../../user/config.html#raw-enabled + +.. Caution:: + + The "raw" directive is a stop-gap measure allowing the author to + bypass reStructuredText's markup. It is a "power-user" feature + that should not be overused or abused. The use of "raw" ties + documents to specific output formats and makes them less portable. + + If you often need to use the "raw" directive or a "raw"-derived + interpreted text role, that is a sign either of overuse/abuse or + that functionality may be missing from reStructuredText. Please + describe your situation in a message to the Docutils-users_ mailing + list. + +.. _Docutils-users: ../../user/mailing-lists.html#docutils-users + +The "raw" directive indicates non-reStructuredText data that is to be +passed untouched to the Writer. The names of the output formats are +given in the directive arguments. The interpretation of the raw data +is up to the Writer. A Writer may ignore any raw output not matching +its format. + +For example, the following input would be passed untouched by an HTML +writer:: + + .. raw:: html + + <hr width=50 size=10> + +A LaTeX Writer could insert the following raw content into its +output stream:: + + .. raw:: latex + + \setlength{\parindent}{0pt} + +Raw data can also be read from an external file, specified in a +directive option. In this case, the content block must be empty. For +example:: + + .. raw:: html + :file: inclusion.html + +Inline equivalents of the "raw" directive can be defined via +`custom interpreted text roles`_ derived from the `"raw" role`_. + +The following options are recognized: + +``file`` : string (newlines removed) + The local filesystem path of a raw data file to be included. + +``url`` : string (whitespace removed) + An Internet URL reference to a raw data file to be included. + +``encoding`` : string + The text encoding of the external raw data (file or URL). + Defaults to the document's encoding (if specified). + +and the common option class_. + + +.. _"raw" role: roles.html#raw + + +.. _classes: + +Class +===== + +:Directive Type: "class" +:Doctree Element: pending_ +:Directive Arguments: One or more, required (class names / attribute + values). +:Directive Options: None. +:Directive Content: Optional. If present, it is interpreted as body + elements. + +The "class" directive sets the `"classes"`_ attribute value on its content +or on the first immediately following [#]_ non-comment element [#]_. +The directive argument consists of one or more space-separated class +names. The names are transformed to conform to the regular expression +``[a-z](-?[a-z0-9]+)*`` (see `Identifier Normalization`_ below). + +Examples:: + + .. class:: special + + This is a "special" paragraph. + + .. class:: exceptional remarkable + + An Exceptional Section + ====================== + + This is an ordinary paragraph. + + .. class:: multiple + + First paragraph. + + Second paragraph. + +The text above is parsed and transformed into this doctree fragment:: + + <paragraph classes="special"> + This is a "special" paragraph. + <section classes="exceptional remarkable"> + <title> + An Exceptional Section + <paragraph> + This is an ordinary paragraph. + <paragraph classes="multiple"> + First paragraph. + <paragraph classes="multiple"> + Second paragraph. + + +.. [#] This is also true, if the class directive is "nested" at the end of + an indented text block, for example:: + + .. note:: the class values set in this directive-block do not apply to + the note but the next paragraph. + + .. class:: special + + This is a paragraph with class value "special". + + This allows the "classification" of individual list items (except the + first, as a preceding class directive applies to the list as a whole):: + + * bullet list + + .. class:: classy item + + * second item, with class argument + +.. [#] To set a "classes" attribute value on a block quote, the + "class" directive must be followed by an empty comment:: + + .. class:: highlights + .. + + Block quote text. + + Without the empty comment, the indented text would be interpreted as the + "class" directive's content, and the classes would be applied to each + element (paragraph, in this case) individually, instead of to the block + quote as a whole. + + +Identifier Normalization +~~~~~~~~~~~~~~~~~~~~~~~~ + +Docutils `class names`_ and `identifier keys`_ are normalized to conform +to the regular expression "``[a-z](-?[a-z0-9]+)*``" by converting + +* alphabetic characters to lowercase, +* accented characters to the base character, +* non-alphanumeric characters to hyphens, +* consecutive hyphens into one hyphen + +and stripping + +* leading hyphens and number characters, and +* trailing hyphens. + +For example ``"Rot.Gelb&Grün:+2008"`` becomes ``"rot-gelb-grun-2008"`` and +``"1000_Steps!"`` becomes ``"steps"``. + +.. topic:: Rationale: + + Identifier keys must be valid in all supported output formats. + + For HTML 4.1 + CSS1 compatibility, identifiers should have no + underscores, colons, or periods. Hyphens may be used. + + - The `HTML 4.01 spec`_ defines identifiers based on SGML tokens: + + ID and NAME tokens must begin with a letter ([A-Za-z]) and + may be followed by any number of letters, digits ([0-9]), + hyphens ("-"), underscores ("_"), colons (":"), and periods + ("."). + + -- https://www.w3.org/TR/html401/types.html#type-name + + - The `CSS1 spec`_ defines identifiers based on the "name" token + ("flex" tokenizer notation below; "latin1" and "escape" 8-bit + characters have been replaced with XML entities):: + + unicode \\[0-9a-f]{1,4} + latin1 [¡-ÿ] + escape {unicode}|\\[ -~¡-ÿ] + nmchar [-A-Za-z0-9]|{latin1}|{escape} + name {nmchar}+ + + The CSS1 rule requires underscores ("_"), colons (":"), and + periods (".") to be escaped [#]_, + therefore `"classes"`_ and `"ids"`_ attributes should not + contain these characters. Combined with HTML4.1 requirements (the + first character must be a letter; no "unicode", "latin1", or + "escape" characters), this results in the regular expression + ``[A-Za-z][-A-Za-z0-9]*``. Docutils adds a normalization by + downcasing and merge of consecutive hyphens. + + .. [#] CSS identifiers may use underscores ("_") directly in + `CSS Level 1`__, `CSS2.1`__, CSS2.2__, and CSS3__. + + __ https://www.w3.org/TR/CSS21/syndata.html#value-def-identifier + __ https://www.w3.org/TR/CSS/#css-level-1 + __ https://www.w3.org/TR/CSS22/syndata.html + __ https://www.w3.org/TR/css-syntax-3/#typedef-ident-token + + .. _HTML 4.01 spec: https://www.w3.org/TR/html401/ + .. _CSS1 spec: https://www.w3.org/TR/REC-CSS1 + +.. _role: + +Custom Interpreted Text Roles +============================= + +:Directive Type: "role" +:Doctree Element: None; affects subsequent parsing. +:Directive Arguments: Two; one required (new `role name`_), one optional + (base role name, in parentheses). +:Directive Options: Possible (depends on base role). +:Directive Content: depends on base role. + +The "role" directive dynamically creates a custom `interpreted text +role`_ and registers it with the parser. This means that after +declaring a role like this:: + + .. role:: custom + +the document may use the new "custom" role:: + + An example of using :custom:`interpreted text` + +This will be parsed into the following document tree fragment:: + + <paragraph> + An example of using + <inline classes="custom"> + interpreted text + +The role must be declared in a document before it can be used. + +.. _role name: + +Role names are case insensitive and must conform to the rules of +simple `reference names`_ (but do not share a namespace with +hyperlinks, footnotes, and citations). + +The new role may be based on an existing role, specified as a second +argument in parentheses (whitespace optional):: + + .. role:: custom(emphasis) + + :custom:`text` + +The parsed result is as follows:: + + <paragraph> + <emphasis classes="custom"> + text + +A special case is the `"raw" role`_: derived roles enable +inline `raw data pass-through`_, e.g.:: + + .. role:: raw-role(raw) + :format: html latex + + :raw-role:`raw text` + +If no base role is explicitly specified, a generic custom role is +automatically used. Subsequent interpreted text will produce an +"inline" element with a `"classes"`_ attribute, as in the first example +above. + +With most roles, the ":class:" option can be used to set a "classes" +attribute that is different from the role name. For example:: + + .. role:: custom + :class: special + + :custom:`interpreted text` + +This is the parsed result:: + + <paragraph> + <inline classes="special"> + interpreted text + +.. _role class: + +The following option is recognized by the "role" directive for most +base roles: + +``class`` : text + Set the `"classes"`_ attribute value on the element produced + (``inline``, or element associated with a base class) when the + custom interpreted text role is used. If no directive options are + specified, a "class" option with the directive argument (role + name) as the value is implied. See the class_ directive above. + +Specific base roles may support other options and/or directive +content. See the `reStructuredText Interpreted Text Roles`_ document +for details. + +.. _reStructuredText Interpreted Text Roles: roles.html + + +.. _default-role: + +Setting the Default Interpreted Text Role +========================================= + +:Directive Type: "default-role" +:Doctree Element: None; affects subsequent parsing. +:Directive Arguments: One, optional (new default role name). +:Directive Options: None. +:Directive Content: None. + +The "default-role" directive sets the default interpreted text role, +the role that is used for interpreted text without an explicit role. +For example, after setting the default role like this:: + + .. default-role:: subscript + +any subsequent use of implicit-role interpreted text in the document +will use the "subscript" role:: + + An example of a `default` role. + +This will be parsed into the following document tree fragment:: + + <paragraph> + An example of a + <subscript> + default + role. + +Custom roles may be used (see the "role_" directive above), but it +must have been declared in a document before it can be set as the +default role. See the `reStructuredText Interpreted Text Roles`_ +document for details of built-in roles. + +The directive may be used without an argument to restore the initial +default interpreted text role, which is application-dependent. The +initial default interpreted text role of the standard reStructuredText +parser is "title-reference". + + +Metadata +======== + +:Directive Type: "meta" +:Doctree Element: meta_ +:Directive Arguments: None. +:Directive Options: None. +:Directive Content: Must contain a flat field list. + +The "meta" directive is used to specify metadata\ [#]_ to be stored +in, e.g., `HTML meta elements`_ or as `ODT file properties`_. The +LaTeX writer passes it to the ``pdfinfo`` option of the hyperref_ +package. If an output format does not support "invisible" metadata, +content is silently dropped by the writer. + +.. note:: Data from some `bibliographic fields`_ is automatically + extracted and stored as metadata, too. However, Bibliographic + Fields are also displayed in the document's screen rendering or + printout. + + For an "invisible" *document title*, see the `metadata document + title`_ directive below. + +Within the directive block, a flat field list provides the syntax for +metadata. The field name becomes the contents of the "name" attribute +of the META tag, and the field body (interpreted as a single string +without inline markup) becomes the contents of the "content" +attribute. For example:: + + .. meta:: + :description: The reStructuredText plaintext markup language + :keywords: plaintext, markup language + +This would be converted to the following HTML:: + + <meta name="description" + content="The reStructuredText plaintext markup language"> + <meta name="keywords" content="plaintext, markup language"> + +Support for other META attributes ("http-equiv", "scheme", "lang", +"dir") are provided through field arguments, which must be of the form +"attr=value":: + + .. meta:: + :description lang=en: An amusing story + :description lang=fr: Une histoire amusante + +And their HTML equivalents:: + + <meta name="description" lang="en" content="An amusing story"> + <meta name="description" lang="fr" content="Une histoire amusante"> + +Some META tags use an "http-equiv" attribute instead of the "name" +attribute. To specify "http-equiv" META tags, simply omit the name:: + + .. meta:: + :http-equiv=Content-Type: text/html; charset=ISO-8859-1 + +HTML equivalent:: + + <meta http-equiv="Content-Type" + content="text/html; charset=ISO-8859-1"> + +.. [#] "Metadata" is data about data, in this case data about the + document. Metadata is, e.g., used to describe and classify web + pages in the World Wide Web, in a form that is easy for search + engines to extract and collate. + +.. _HTML meta elements: + https://html.spec.whatwg.org/multipage/semantics.html#the-meta-element +.. _ODT file properties: + https://en.wikipedia.org/wiki/OpenDocument_technical_specification#Metadata +.. _hyperref: https://ctan.org/pkg/hyperref +.. _bibliographic fields: restructuredtext.html#bibliographic-fields + + +Metadata Document Title +======================= + +:Directive Type: "title" +:Doctree Element: Sets the document's `title attribute`_. +:Directive Arguments: One, required (the title text). +:Directive Options: None. +:Directive Content: None. + +The "title" directive specifies the document title as metadata, which +does not become part of the document body. It overrides the +document-supplied `document title`_ and the `"title" configuration +setting`_. For example, in HTML output the metadata document title +appears in the title bar of the browser window. + +.. _document title: restructuredtext.html#document-title +.. _"title" configuration setting: ../../user/config.html#title + +Restructuredtext-Test-Directive +=============================== + +:Directive Type: "restructuredtext-test-directive" +:Doctree Element: system_warning +:Directive Arguments: None. +:Directive Options: None. +:Directive Content: Interpreted as a literal block. + +This directive is provided for test purposes only. (Nobody is +expected to type in a name *that* long!) It is converted into a +level-1 (info) system message showing the directive data, possibly +followed by a literal block containing the rest of the directive +block. + +-------------- +Common Options +-------------- + +Most of the directives that generate doctree elements support the following +options: + +.. _class-option: +.. _class: + +``class`` : text (space separated list of `class names`_) + Set a `"classes"`_ attribute value on the doctree element generated by + the directive. See also the class_ directive. + + .. _name: + +``name`` : text + Add `text` to the `"names"`_ attribute of the doctree element generated + by the directive. This allows `hyperlink references`_ to the element + using `text` as `reference name`_. + + Specifying the `name` option of a directive, e.g., :: + + .. image:: bild.png + :name: my picture + + is a concise syntax alternative to preceding it with a `hyperlink + target`_ :: + + .. _my picture: + + .. image:: bild.png + + +.. _reference name: +.. _reference names: restructuredtext.html#reference-names +.. _hyperlink target: restructuredtext.html#hyperlink-targets +.. _hyperlink references: restructuredtext.html#hyperlink-references +.. _class names: ../doctree.html#classnames-type +.. _"classes": ../doctree.html#classes +.. _identifier keys: ../doctree.html#ids-type +.. _"ids": ../doctree.html#ids +.. _"names": ../doctree.html#names +.. _admonition: ../doctree.html#admonition +.. _block_quote: ../doctree.html#block-quote +.. _caption: ../doctree.html#caption +.. _compound: ../doctree.html#compound +.. _container element: ../doctree.html#container +.. _decoration: ../doctree.html#decoration +.. _figure: ../doctree.html#figure +.. _footnote: ../doctree.html#footnote +.. _footnote_reference: ../doctree.html#footnote-reference +.. _generated: ../doctree.html#generated +.. _image: ../doctree.html#image +.. _inline elements: ../doctree.html#inline-elements +.. _interpreted text role: roles.html +.. _literal_block: ../doctree.html#literal-block +.. _legend: ../doctree.html#legend +.. _length: restructuredtext.html#length-units +.. _line_block: ../doctree.html#line-block +.. _math_block: ../doctree.html#math-block +.. _meta: ../doctree.html#meta +.. _pending: ../doctree.html#pending +.. _percentage: restructuredtext.html#percentage-units +.. _raw: ../doctree.html#raw +.. _rubric: ../doctree.html#rubric +.. _sidebar: ../doctree.html#sidebar +.. _table: ../doctree.html#table +.. _title: ../doctree.html#title +.. _title attribute: ../doctree.html#title-attribute +.. _topic: ../doctree.html#topic + + + +.. + Local Variables: + mode: indented-text + indent-tabs-mode: nil + sentence-end-double-space: t + fill-column: 70 + End: diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/ref/rst/introduction.txt b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/ref/rst/introduction.txt new file mode 100644 index 00000000..8c34a64b --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/ref/rst/introduction.txt @@ -0,0 +1,311 @@ +===================================== + An Introduction to reStructuredText +===================================== +:Author: David Goodger +:Contact: docutils-develop@lists.sourceforge.net +:Revision: $Revision$ +:Date: $Date$ +:Copyright: This document has been placed in the public domain. + +reStructuredText_ is an easy-to-read, what-you-see-is-what-you-get +plaintext markup syntax and parser system. It is useful for inline +program documentation (such as Python docstrings), for quickly +creating simple web pages, and for standalone documents. +reStructuredText_ is a proposed revision and reinterpretation of the +StructuredText_ and Setext_ lightweight markup systems. + +reStructuredText is designed for extensibility for specific +application domains. Its parser is a component of Docutils_. + +This document defines the goals_ of reStructuredText and provides a +history_ of the project. It is written using the reStructuredText +markup, and therefore serves as an example of its use. For a gentle +introduction to using reStructuredText, please read `A +ReStructuredText Primer`_. The `Quick reStructuredText`_ user +reference is also useful. The `reStructuredText Markup +Specification`_ is the definitive reference. There is also an +analysis of the `Problems With StructuredText`_. + +ReStructuredText's web page is +https://docutils.sourceforge.io/rst.html. + +.. _reStructuredText: https://docutils.sourceforge.io/rst.html +.. _StructuredText: https://zopestructuredtext.readthedocs.org/ +.. _Setext: https://docutils.sourceforge.io/mirror/setext.html +.. _Docutils: https://docutils.sourceforge.io/ +.. _A ReStructuredText Primer: ../../user/rst/quickstart.html +.. _Quick reStructuredText: ../../user/rst/quickref.html +.. _reStructuredText Markup Specification: restructuredtext.html +.. _Problems with StructuredText: ../../dev/rst/problems.html + + +Goals +===== + +The primary goal of reStructuredText_ is to define a markup syntax for +use in Python docstrings and other documentation domains, that is +readable and simple, yet powerful enough for non-trivial use. The +intended purpose of the reStructuredText markup is twofold: + +- the establishment of a set of standard conventions allowing the + expression of structure within plaintext, and + +- the conversion of such documents into useful structured data + formats. + +The secondary goal of reStructuredText is to be accepted by the Python +community (by way of being blessed by PythonLabs and the BDFL [#]_) as +a standard for Python inline documentation (possibly one of several +standards, to account for taste). + +.. [#] Python's creator and "Benevolent Dictator For Life", + Guido van Rossum. + +To clarify the primary goal, here are specific design goals, in order, +beginning with the most important: + +1. Readable. The marked-up text must be easy to read without any + prior knowledge of the markup language. It should be as easily + read in raw form as in processed form. + +2. Unobtrusive. The markup that is used should be as simple and + unobtrusive as possible. The simplicity of markup constructs + should be roughly proportional to their frequency of use. The most + common constructs, with natural and obvious markup, should be the + simplest and most unobtrusive. Less common constructs, for which + there is no natural or obvious markup, should be distinctive. + +3. Unambiguous. The rules for markup must not be open for + interpretation. For any given input, there should be one and only + one possible output (including error output). + +4. Unsurprising. Markup constructs should not cause unexpected output + upon processing. As a fallback, there must be a way to prevent + unwanted markup processing when a markup construct is used in a + non-markup context (for example, when documenting the markup syntax + itself). + +5. Intuitive. Markup should be as obvious and easily remembered as + possible, for the author as well as for the reader. Constructs + should take their cues from such naturally occurring sources as + plaintext email messages, newsgroup postings, and text + documentation such as README.txt files. + +6. Easy. It should be easy to mark up text using any ordinary text + editor. + +7. Scalable. The markup should be applicable regardless of the length + of the text. + +8. Powerful. The markup should provide enough constructs to produce a + reasonably rich structured document. + +9. Language-neutral. The markup should apply to multiple natural (as + well as artificial) languages, not only English. + +10. Extensible. The markup should provide a simple syntax and + interface for adding more complex general markup, and custom + markup. + +11. Output-format-neutral. The markup will be appropriate for + processing to multiple output formats, and will not be biased + toward any particular format. + +The design goals above were used as criteria for accepting or +rejecting syntax, or selecting between alternatives. + +It is emphatically *not* the goal of reStructuredText to define +docstring semantics, such as docstring contents or docstring length. +These issues are orthogonal to the markup syntax and beyond the scope +of this specification. + +Also, it is not the goal of reStructuredText to maintain compatibility +with StructuredText_ or Setext_. reStructuredText shamelessly steals +their great ideas and ignores the not-so-great. + +Author's note: + + Due to the nature of the problem we're trying to solve (or, + perhaps, due to the nature of the proposed solution), the above + goals unavoidably conflict. I have tried to extract and distill + the wisdom accumulated over the years in the Python Doc-SIG_ + mailing list and elsewhere, to come up with a coherent and + consistent set of syntax rules, and the above goals by which to + measure them. + + There will inevitably be people who disagree with my particular + choices. Some desire finer control over their markup, others + prefer less. Some are concerned with very short docstrings, + others with full-length documents. This specification is an + effort to provide a reasonably rich set of markup constructs in a + reasonably simple form, that should satisfy a reasonably large + group of reasonable people. + + David Goodger (goodger@python.org), 2001-04-20 + +.. _Doc-SIG: https://www.python.org/sigs/doc-sig/ + + +History +======= + +reStructuredText_, the specification, is based on StructuredText_ and +Setext_. StructuredText was developed by Jim Fulton of `Zope +Corporation`_ (formerly Digital Creations) and first released in 1996. +It is now released as a part of the open-source "Z Object Publishing +Environment" (ZOPE_). Ian Feldman's and Tony Sanders' earlier Setext_ +specification was either an influence on StructuredText or, by their +similarities, at least evidence of the correctness of this approach. + +I discovered StructuredText_ in late 1999 while searching for a way to +document the Python modules in one of my projects. Version 1.1 of +StructuredText was included in Daniel Larsson's pythondoc_. Although +I was not able to get pythondoc to work for me, I found StructuredText +to be almost ideal for my needs. I joined the Python Doc-SIG_ +(Documentation Special Interest Group) mailing list and found an +ongoing discussion of the shortcomings of the StructuredText +"standard". This discussion has been going on since the inception of +the mailing list in 1996, and possibly predates it. + +I decided to modify the original module with my own extensions and +some suggested by the Doc-SIG members. I soon realized that the +module was not written with extension in mind, so I embarked upon a +general reworking, including adapting it to the "re" regular +expression module (the original inspiration for the name of this +project). Soon after I completed the modifications, I discovered that +StructuredText.py was up to version 1.23 in the ZOPE distribution. +Implementing the new syntax extensions from version 1.23 proved to be +an exercise in frustration, as the complexity of the module had become +overwhelming. + +In 2000, development on StructuredTextNG ("Next Generation") began at +`Zope Corporation`_ (then Digital Creations). It seems to have many +improvements, but still suffers from many of the problems of classic +StructuredText. + +I decided that a complete rewrite was in order, and even started a +`reStructuredText SourceForge project`_ (now inactive). My +motivations (the "itches" I aim to "scratch") are as follows: + +- I need a standard format for inline documentation of the programs I + write. This inline documentation has to be convertible to other + useful formats, such as HTML. I believe many others have the same + need. + +- I believe in the Setext/StructuredText idea and want to help + formalize the standard. However, I feel the current specifications + and implementations have flaws that desperately need fixing. + +- reStructuredText could form part of the foundation for a + documentation extraction and processing system, greatly benefitting + Python. But it is only a part, not the whole. reStructuredText is + a markup language specification and a reference parser + implementation, but it does not aspire to be the entire system. I + don't want reStructuredText or a hypothetical Python documentation + processor to die stillborn because of over-ambition. + +- Most of all, I want to help ease the documentation chore, the bane + of many a programmer. + +Unfortunately I was sidetracked and stopped working on this project. +In November 2000 I made the time to enumerate the problems of +StructuredText and possible solutions, and complete the first draft of +a specification. This first draft was posted to the Doc-SIG in three +parts: + +- `A Plan for Structured Text`__ +- `Problems With StructuredText`__ +- `reStructuredText: Revised Structured Text Specification`__ + +__ https://mail.python.org/pipermail/doc-sig/2000-November/001239.html +__ https://mail.python.org/pipermail/doc-sig/2000-November/001240.html +__ https://mail.python.org/pipermail/doc-sig/2000-November/001241.html + +In March 2001 a flurry of activity on the Doc-SIG spurred me to +further revise and refine my specification, the result of which you +are now reading. An offshoot of the reStructuredText project has been +the realization that a single markup scheme, no matter how well +thought out, may not be enough. In order to tame the endless debates +on Doc-SIG, a flexible `Docstring Processing System framework`_ needed +to be constructed. This framework has become the more important of +the two projects; reStructuredText_ has found its place as one +possible choice for a single component of the larger framework. + +The project web site and the first project release were rolled out in +June 2001, including posting the second draft of the spec [#spec-2]_ +and the first draft of PEPs 256, 257, and 258 [#peps-1]_ to the +Doc-SIG. These documents and the project implementation proceeded to +evolve at a rapid pace. Implementation history details can be found +in the `project history file`_. + +In November 2001, the reStructuredText parser was nearing completion. +Development of the parser continued with the addition of small +convenience features, improvements to the syntax, the filling in of +gaps, and bug fixes. After a long holiday break, in early 2002 most +development moved over to the other Docutils components, the +"Readers", "Writers", and "Transforms". A "standalone" reader +(processes standalone text file documents) was completed in February, +and a basic HTML writer (producing HTML 4.01, using CSS-1) was +completed in early March. + +`PEP 287`_, "reStructuredText Standard Docstring Format", was created +to formally propose reStructuredText as a standard format for Python +docstrings, PEPs, and other files. It was first posted to +comp.lang.python_ and the Python-dev_ mailing list on 2002-04-02. + +Version 0.4 of the reStructuredText__ and `Docstring Processing +System`_ projects were released in April 2002. The two projects were +immediately merged, renamed to "Docutils_", and a 0.1 release soon +followed. + +.. __: `reStructuredText SourceForge project`_ + +.. [#spec-2] The second draft of the spec: + + - `An Introduction to reStructuredText`__ + - `Problems With StructuredText`__ + - `reStructuredText Markup Specification`__ + - `Python Extensions to the reStructuredText Markup + Specification`__ + + __ https://mail.python.org/pipermail/doc-sig/2001-June/001858.html + __ https://mail.python.org/pipermail/doc-sig/2001-June/001859.html + __ https://mail.python.org/pipermail/doc-sig/2001-June/001860.html + __ https://mail.python.org/pipermail/doc-sig/2001-June/001861.html + +.. [#peps-1] First drafts of the PEPs: + + - `PEP 256: Docstring Processing System Framework`__ + - `PEP 258: DPS Generic Implementation Details`__ + - `PEP 257: Docstring Conventions`__ + + Current working versions of the PEPs can be found in + https://docutils.sourceforge.io/docs/peps/, and official versions + can be found in the `master PEP repository`_. + + __ https://mail.python.org/pipermail/doc-sig/2001-June/001855.html + __ https://mail.python.org/pipermail/doc-sig/2001-June/001856.html + __ https://mail.python.org/pipermail/doc-sig/2001-June/001857.html + + +.. _Zope Corporation: http://www.zope.com +.. _ZOPE: https://www.zope.dev +.. _reStructuredText SourceForge project: + http://structuredtext.sourceforge.net/ +.. _pythondoc: http://starship.python.net/crew/danilo/pythondoc/ +.. _project history file: ../../../HISTORY.html +.. _PEP 287: ../../peps/pep-0287.html +.. _Docstring Processing System framework: ../../peps/pep-0256.html +.. _comp.lang.python: news:comp.lang.python +.. _Python-dev: https://mail.python.org/pipermail/python-dev/ +.. _Docstring Processing System: http://docstring.sourceforge.net/ +.. _master PEP repository: https://peps.python.org/ + + +.. + Local Variables: + mode: indented-text + indent-tabs-mode: nil + sentence-end-double-space: t + fill-column: 70 + End: diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/ref/rst/mathematics.txt b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/ref/rst/mathematics.txt new file mode 100644 index 00000000..ecd06377 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/ref/rst/mathematics.txt @@ -0,0 +1,1098 @@ +============================ +LaTeX syntax for mathematics +============================ + +.. role:: m(math) +.. default-role:: math +.. |latex| replace:: L\ :sup:`A`\ T\ :sub:`E`\ X + +:abstract: Docutils supports mathematical content with a `"math" + directive`__ and `role`__. The input format is *LaTeX math + syntax*\ [#math-syntax]_ with support for Unicode symbols. + +.. sectnum:: +.. contents:: + +__ https://docutils.sourceforge.io/docs/ref/rst/directives.html#math +__ https://docutils.sourceforge.io/docs/ref/rst/roles.html#math + +Inline formulas and displayed equations +======================================= + +The **math role** can be used for inline mathematical expressions: +``:math:`\psi(r) = \exp(-2r)``` will produce :m:`\psi(r)=\exp(-2r)`. +Inside the backtics you can write anything you would write between dollar +signs in a LaTeX document. [#math-syntax]_ +  +.. tip:: + + If you put ``.. default-role:: math`` at the top of your + document, you can write ```x^2``` instead of the longer + version: ``:math:`x^2```. You can also introduce an + abbreviation like this ``.. role:: m(math)``. That will allow + you to write ``:m:`x^2``` or ```x^2`:m:``. + + +The **math directive** is used for displayed equations. It corresponds to +an ``equation*`` or ``align*`` environment in a LaTeX document. If you +write:: + + .. math:: \psi(r) = e^{-2r} + +you will get: + +.. math:: \psi(r) = e^{-2r} + +A more complex example is the definition of the `Fourier transform`_:: + + .. math:: + :name: Fourier transform + + (\mathcal{F}f)(y) + = \frac{1}{\sqrt{2\pi}^{\ n}} + \int_{\mathbb{R}^n} f(x)\, + e^{-\mathrm{i} y \cdot x} \,\mathrm{d} x. + +which is rendered as: + +.. math:: + :name: Fourier transform + + (\mathcal{F}f)(y) + = \frac{1}{\sqrt{2\pi}^{\ n}} + \int_{\mathbb{R}^n} f(x)\, + e^{-\mathrm{i} y \cdot x} \,\mathrm{d} x. + +The ``:name:`` option puts a label on the equation that can be +linked to by `hyperlink references`_. + +Displayed equations can use ``\\`` and ``&`` for line shifts and alignments:: + + .. math:: + + a &= (x + y)^2 & b &= (x - y)^2 \\ + &= x^2 + 2xy + y^2 & &= x^2 - 2xy + y^2 + +LaTeX output will wrap it in an ``align*`` environment. +The result is: + +.. math:: + + a &= (x + y)^2 & b &= (x - y)^2 \\ + &= x^2 + 2xy + y^2 & &= x^2 - 2xy + y^2 + + +.. [#math-syntax] The supported LaTeX commands include AMS extensions + (see, e.g., the `Short Math Guide`_). Some of the shown symbols + require the "amssymb" `LaTeX package`_ (or another package providing + the AMS symbol macros) when exported with the "latex" writer. + + The support is limited to a subset of *LaTeX math* by the conversion + required for many output formats. For HTML, the `math_output`_ + configuration setting (or the corresponding ``--math-output`` command + line option) selects between alternative output formats with different + subsets of supported elements. If a writer does not support math + typesetting, the content is inserted verbatim. + +.. _hyperlink references: + ../ref/rst/restructuredtext.html#hyperlink-references +.. _Short Math Guide: + https://mirrors.ctan.org/info/short-math-guide/short-math-guide.pdf +.. _math_output: + https://docutils.sourceforge.io/docs/user/config.html#math-output +.. _LaTeX package: + ../../user/latex.html#latex-document-classes-and-packages + + +Mathematical symbols +==================== + +The following tables are adapted from the first edition of +"The LaTeX Companion" (Goossens, Mittelbach, Samarin) and the +AMS `Short Math Guide`_. + + +Accents and embellishments +-------------------------- + +The "narrow" accents are intended for a single-letter base. + +.. class:: colwidths-auto + + =========== ============= =========== ============= ============== ================ + `\acute{x}` ``\acute{x}`` `\dot{t}` ``\dot{t}`` `\hat{x}` ``\hat{x}`` + `\bar{v}` ``\bar{v}`` `\ddot{t}` ``\ddot{t}`` `\mathring{x}` ``\mathring{x}`` + `\breve{x}` ``\breve{x}`` `\dddot{t}` ``\dddot{t}`` `\tilde{n}` ``\tilde{n}`` + `\check{x}` ``\check{x}`` `\grave{x}` ``\grave{x}`` `\vec{x}` ``\vec{x}`` + =========== ============= =========== ============= ============== ================ + +When adding an accent to an i or j in math, dotless variants can be +obtained with ``\imath`` and ``\jmath``: `\hat \imath`, `\vec{\jmath}`. + +For embellishments that span multiple symbols, use: + +.. class:: colwidths-auto + + ========================== ============================ =========================== ============================= + `\widetilde{gbi}` ``\widetilde{gbi}`` `\widehat{gbi}` ``\widehat{gbi}`` + `\overline{gbi}` ``\overline{gbi}`` `\underline{gbi}` ``\underline{gbi}`` + `\overbrace{gbi}` ``\overbrace{gbi}`` `\underbrace{gbi}` ``\underbrace{gbi}`` + `\overleftarrow{gbi}` ``\overleftarrow{gbi}`` `\underleftarrow{gbi}` ``\underleftarrow{gbi}`` + `\overrightarrow{gbi}` ``\overrightarrow{gbi}`` `\underrightarrow{gbi}` ``\underrightarrow{gbi}`` + `\overleftrightarrow{gbi}` ``\overleftrightarrow{gbi}`` `\underleftrightarrow{gbi}` ``\underleftrightarrow{gbi}`` + ========================== ============================ =========================== ============================= + + +Binary operators +---------------- +.. class:: colwidths-auto + + ================== ==================== ================= =================== ================== ==================== + `*` ``*`` `\circledast` ``\circledast`` `\ominus` ``\ominus`` + `+` ``+`` `\circledcirc` ``\circledcirc`` `\oplus` ``\oplus`` + `-` ``-`` `\circleddash` ``\circleddash`` `\oslash` ``\oslash`` + `:` ``:`` `\cup` ``\cup`` `\otimes` ``\otimes`` + `\Cap` ``\Cap`` `\curlyvee` ``\curlyvee`` `\pm` ``\pm`` + `\Cup` ``\Cup`` `\curlywedge` ``\curlywedge`` `\rightthreetimes` ``\rightthreetimes`` + `\amalg` ``\amalg`` `\dagger` ``\dagger`` `\rtimes` ``\rtimes`` + `\ast` ``\ast`` `\ddagger` ``\ddagger`` `\setminus` ``\setminus`` + `\bigcirc` ``\bigcirc`` `\diamond` ``\diamond`` `\smallsetminus` ``\smallsetminus`` + `\bigtriangledown` ``\bigtriangledown`` `\div` ``\div`` `\sqcap` ``\sqcap`` + `\bigtriangleup` ``\bigtriangleup`` `\divideontimes` ``\divideontimes`` `\sqcup` ``\sqcup`` + `\boxdot` ``\boxdot`` `\dotplus` ``\dotplus`` `\star` ``\star`` + `\boxminus` ``\boxminus`` `\doublebarwedge` ``\doublebarwedge`` `\times` ``\times`` + `\boxplus` ``\boxplus`` `\gtrdot` ``\gtrdot`` `\triangleleft` ``\triangleleft`` + `\boxtimes` ``\boxtimes`` `\intercal` ``\intercal`` `\triangleright` ``\triangleright`` + `\bullet` ``\bullet`` `\leftthreetimes` ``\leftthreetimes`` `\uplus` ``\uplus`` + `\cap` ``\cap`` `\lessdot` ``\lessdot`` `\vee` ``\vee`` + `\cdot` ``\cdot`` `\ltimes` ``\ltimes`` `\veebar` ``\veebar`` + `\centerdot` ``\centerdot`` `\mp` ``\mp`` `\wedge` ``\wedge`` + `\circ` ``\circ`` `\odot` ``\odot`` `\wr` ``\wr`` + ================== ==================== ================= =================== ================== ==================== + + +Extensible delimiters +--------------------- +Unless you indicate otherwise, delimiters in math formulas remain at the +standard size regardless of the height of the enclosed material. To get +adaptable sizes, use ``\left`` and ``\right`` prefixes, for example +`g(A,B,Y) = f \left(A,B,X=h^{[X]}(Y)\right)` or + +.. math:: a_n = \left(\frac{1}{2}\right)^n + +Use ``.`` for "empty" delimiters: + +.. math:: A = \left . \frac{1}{1-n}\, \right |_{n=0}^\infty + +See also the commands for fixed `delimiter sizes`_ below. + +The following symbols extend when used with ``\left`` and ``\right``: + +Pairing delimiters +~~~~~~~~~~~~~~~~~~ +.. class:: colwidths-auto + + =============== ================= ========================= =========================== + `( )` ``( )`` `\langle \rangle` ``\langle \rangle`` + `[ ]` ``[ ]`` `\lceil \rceil` ``\lceil \rceil`` + `\{ \}` ``\{ \}`` `\lfloor \rfloor` ``\lfloor \rfloor`` + `\lvert \rvert` ``\lvert \rvert`` `\lgroup \rgroup` ``\lgroup \rgroup`` + `\lVert \rVert` ``\lVert \rVert`` `\lmoustache \rmoustache` ``\lmoustache \rmoustache`` + =============== ================= ========================= =========================== + + +Nonpairing delimiters +~~~~~~~~~~~~~~~~~~~~~ +.. class:: colwidths-auto + + ==== ====== ============ ============== ============ ============== + `|` ``|`` `\vert` ``\vert`` `\arrowvert` ``\arrowvert`` + `\|` ``\|`` `\Vert` ``\Vert`` `\Arrowvert` ``\Arrowvert`` + `/` ``/`` `\backslash` ``\backslash`` `\bracevert` ``\bracevert`` + ==== ====== ============ ============== ============ ============== + +The use of ``|`` and ``\|`` for pairs of vertical bars may produce +incorrect spacing, e.g., ``|k|=|-k|`` produces `|k| = |−k|` and +``|\sin(x)|`` produces `|\sin(x)|`. The pairing delimiters, e.g. +`\lvert -k\rvert` and `\lvert\sin(x)\rvert`, prevent this problem +(in LaTeX and MathJax). + +.. TODO: fix spacing before unary minus (see also cases example below). + +Extensible vertical arrows +-------------------------- +.. class:: colwidths-auto + + =============================== ====================================== + `\uparrow` ``\uparrow`` `\Uparrow` ``\Uparrow`` + `\downarrow` ``\downarrow`` `\Downarrow` ``\Downarrow`` + `\updownarrow` ``\updownarrow`` `\Updownarrow` ``\Updownarrow`` + =============================== ====================================== + + +Functions (named operators) +--------------------------- +.. class:: colwidths-auto + + ========= =========== ========= =========== ============= ================ + `\arccos` ``\arccos`` `\gcd` ``\gcd`` `\Pr` ``\Pr`` + `\arcsin` ``\arcsin`` `\hom` ``\hom`` `\projlim` ``\projlim`` + `\arctan` ``\arctan`` `\inf` ``\inf`` `\sec` ``\sec`` + `\arg` ``\arg`` `\injlim` ``\injlim`` `\sin` ``\sin`` + `\cos` ``\cos`` `\ker` ``\ker`` `\sinh` ``\sinh`` + `\cosh` ``\cosh`` `\lg` ``\lg`` `\sup` ``\sup`` + `\cot` ``\cot`` `\lim` ``\lim`` `\tan` ``\tan`` + `\coth` ``\coth`` `\liminf` ``\liminf`` `\tanh` ``\tanh`` + `\csc` ``\csc`` `\limsup` ``\limsup`` `\varlimsup` ``\varlimsup`` + `\deg` ``\deg`` `\ln` ``\ln`` `\varliminf` ``\varliminf`` + `\det` ``\det`` `\log` ``\log`` `\varprojlim` ``\varprojlim`` + `\dim` ``\dim`` `\max` ``\max`` `\varinjlim` ``\varinjlim`` + `\exp` ``\exp`` `\min` ``\min`` + ========= =========== ========= =========== ============= ================ + +Named operators outside the above list can be typeset with +``\operatorname{name}``, e.g. + +.. math:: \operatorname{sgn}(-3) = -1. + +.. TODO: \operatorname* for function name with limits. + +The ``\DeclareMathOperator`` command can only be used in the +`LaTeX preamble`_. + +.. _LaTeX preamble: latex.html#latex-preamble + + +Greek letters +------------- + +Greek letters that have Latin look-alikes are rarely used in math +formulas and not supported by LaTeX. + +.. class:: colwidths-auto + + ========== ============ ========== ============ ========== ============ ============== =============== + `\Gamma` ``\Gamma`` `\alpha` ``\alpha`` `\mu` ``\mu`` `\omega` ``\omega`` + `\Delta` ``\Delta`` `\beta` ``\beta`` `\nu` ``\nu`` `\digamma` ``\digamma`` + `\Lambda` ``\Lambda`` `\gamma` ``\gamma`` `\xi` ``\xi`` `\varepsilon` ``\varepsilon`` + `\Phi` ``\Phi`` `\delta` ``\delta`` `\pi` ``\pi`` `\varkappa` ``\varkappa`` + `\Pi` ``\Pi`` `\epsilon` ``\epsilon`` `\rho` ``\rho`` `\varphi` ``\varphi`` + `\Psi` ``\Psi`` `\zeta` ``\zeta`` `\sigma` ``\sigma`` `\varpi` ``\varpi`` + `\Sigma` ``\Sigma`` `\eta` ``\eta`` `\tau` ``\tau`` `\varrho` ``\varrho`` + `\Theta` ``\Theta`` `\theta` ``\theta`` `\upsilon` ``\upsilon`` `\varsigma` ``\varsigma`` + `\Upsilon` ``\Upsilon`` `\iota` ``\iota`` `\phi` ``\phi`` `\vartheta` ``\vartheta`` + `\Xi` ``\Xi`` `\kappa` ``\kappa`` `\chi` ``\chi`` + `\Omega` ``\Omega`` `\lambda` ``\lambda`` `\psi` ``\psi`` + ========== ============ ========== ============ ========== ============ ============== =============== + +In LaTeX, the default font for capital Greek letters is upright/roman. +*Italic* capital Greek letters can be obtained by loading a `package +providing the "ISO" math style`__. They are used by default in MathML. + +Individual Greek italic capitals can also be achieved preceding the +letter name with ``var`` like ``\varPhi``: +`\varGamma\ \varDelta\ \varLambda\ \varPhi\ \varPi\ \varPsi\ \varSigma\ +\varTheta\ \varUpsilon\ \varXi\ \varOmega` + + +__ https://mirrors.ctan.org/macros/latex/contrib/isomath/isomath.html#table-2 + + +Letterlike symbols +------------------ +.. class:: colwidths-auto + + ============= =============== ========== ============ ========== ============ =========== ============= + `\forall` ``\forall`` `\aleph` ``\aleph`` `\hbar` ``\hbar`` `\ell` ``\ell`` + `\complement` ``\complement`` `\beth` ``\beth`` `\hslash` ``\hslash`` `\wp` ``\wp`` + `\exists` ``\exists`` `\gimel` ``\gimel`` `\Im` ``\Im`` `\Re` ``\Re`` + `\Finv` ``\Finv`` `\daleth` ``\daleth`` `\imath` ``\imath`` `\circledR` ``\circledR`` + `\Game` ``\Game`` `\partial` ``\partial`` `\jmath` ``\jmath`` `\circledS` ``\circledS`` + `\mho` ``\mho`` `\eth` ``\eth`` `\Bbbk` ``\Bbbk`` + ============= =============== ========== ============ ========== ============ =========== ============= + +Mathematical Alphabets +---------------------- + +Mathematical alphabets select a combination of font attributes (shape, +weight, family) [#]_. They are intended for mathematical variables where +style variations are important semantically. + +.. class:: colwidths-auto + + =============== ============================ ========================== + command example result + =============== ============================ ========================== + ``\mathbf`` ``\mathbf{r}^2=x^2+y^2+z^2`` `\mathbf{r}^2=x^2+y^2+z^2` + ``\mathbb`` ``\mathbb{R \subset C}`` `\mathbb{R \subset C}` + ``\mathcal`` ``\mathcal{F}f(x)`` `\mathcal{F}f(x)` + ``\mathfrak`` ``\mathfrak{a}`` `\mathfrak{a}` + ``\mathit`` ``\mathit{\Gamma}`` `\mathit{\Gamma}` + ``\mathrm`` ``s_\mathrm{out}`` `s_\mathrm{out}` + ``\mathsf`` ``\mathsf x`` `\mathsf x` + ``\mathtt`` ``\mathtt{0.12}`` `\mathtt{0.12}` + =============== ============================ ========================== + +.. [#] TeX’s *math alphabets* correspond to the `mathematical + alphanumeric symbols`__ block in Unicode and the "mathvariant" `style + attribute`__ in MathML. + + __ https://en.wikipedia.org/wiki/Mathematical_Alphanumeric_Symbols + __ https://developer.mozilla.org/en-US/docs/Web/MathML/Attribute + +Additional alphabets are defined in LaTeX packages, e.g. + +.. class:: colwidths-auto + + =========== ============= ====================== + TeX command LaTeX package MathML "mathvariant" + =========== ============= ====================== + mathbfit isomath_ bold-italic + mathsfit isomath_ sans-serif-italic + mathsfbfit isomath_ sans-serif-bold-italic + mathscr mathrsfs_ script + =========== ============= ====================== +.. _isomath: https://www.ctan.org/pkg/isomath +.. _mathrsfs: https://www.ctan.org/pkg/mathrsfs + +This can be used to typeset vector symbols in bold italic +in line with the International Standard [ISO-80000-2]. + +.. ``\mathbfit{r}^2=x^2+y^2+z^2`` becomes + + .. math:: \mathbfit{r}^2=x^2+y^2+z^2. + +The package mathrsfs_ (and some drop-in replacements) define the ``\mathscr`` +macro that selects a differently shaped "script" alphabet. +Compare `\mathscr{A, B, …, Z, a, b, …, z}` +with `\mathcal{A, B, …, Z, a, b, …, z}`. + + +In contrast to the math alphabet selectors, ``\boldsymbol`` only changes +the *font weight*. In LaTeX, it can be used to get a bold version of any +mathematical symbol (for other output formats, results are mixed): + +.. math:: + \boldsymbol{\cos(x)\pm\alpha \approx 3\Gamma \quad \forall x\in\mathbb{R}} + + +Miscellaneous symbols +--------------------- +.. class:: colwidths-auto + +==================== ====================== ================ ================== ================= =================== +`\#` ``\#`` `\clubsuit` ``\clubsuit`` `\neg` ``\neg`` +`\&` ``\&`` `\diamondsuit` ``\diamondsuit`` `\nexists` ``\nexists`` +`\angle` ``\angle`` `\emptyset` ``\emptyset`` `\prime` ``\prime`` +`\backprime` ``\backprime`` `\exists` ``\exists`` `\sharp` ``\sharp`` +`\bigstar` ``\bigstar`` `\flat` ``\flat`` `\spadesuit` ``\spadesuit`` +`\blacklozenge` ``\blacklozenge`` `\forall` ``\forall`` `\sphericalangle` ``\sphericalangle`` +`\blacksquare` ``\blacksquare`` `\heartsuit` ``\heartsuit`` `\square` ``\square`` +`\blacktriangle` ``\blacktriangle`` `\infty` ``\infty`` `\surd` ``\surd`` +`\blacktriangledown` ``\blacktriangledown`` `\lozenge` ``\lozenge`` `\top` ``\top`` +`\bot` ``\bot`` `\measuredangle` ``\measuredangle`` `\triangle` ``\triangle`` +`\diagdown` ``\diagdown`` `\nabla` ``\nabla`` `\triangledown` ``\triangledown`` +`\diagup` ``\diagup`` `\natural` ``\natural`` `\varnothing` ``\varnothing`` +==================== ====================== ================ ================== ================= =================== + + +Punctuation +----------- +.. class:: colwidths-auto + +=== ===== ======== =============== ======== ========== +`.` ``.`` `!` ``!`` `\vdots` ``\vdots`` +`/` ``/`` `?` ``?`` `\dotsb` ``\dotsb`` +`|` ``|`` `\colon` ``\colon`` [#]_ `\dotsc` ``\dotsc`` +`'` ``'`` `\cdots` ``\cdots`` `\dotsi` ``\dotsi`` +`;` ``;`` `\ddots` ``\ddots`` `\dotsm` ``\dotsm`` +`:` ``:`` `\ldots` ``\ldots`` `\dotso` ``\dotso`` +=== ===== ======== =============== ======== ========== + +.. [#] Punctuation (not ratio): + Compare spacing in `a\colon b\to c` to `a:b = c`. + +Relation symbols +---------------- + +Arrows +~~~~~~ +.. class:: colwidths-auto + + ====================== ======================== ===================== ======================= + `\circlearrowleft` ``\circlearrowleft`` `\circlearrowright` ``\circlearrowright`` + `\curvearrowleft` ``\curvearrowleft`` `\curvearrowright` ``\curvearrowright`` + `\hookleftarrow` ``\hookleftarrow`` `\hookrightarrow` ``\hookrightarrow`` + `\leftarrow` ``\leftarrow`` `\rightarrow` ``\rightarrow`` + `\Leftarrow` ``\Leftarrow`` `\Rightarrow` ``\Rightarrow`` + `\leftarrowtail` ``\leftarrowtail`` `\rightarrowtail` ``\rightarrowtail`` + `\leftharpoondown` ``\leftharpoondown`` `\rightharpoondown` ``\rightharpoondown`` + `\leftharpoonup` ``\leftharpoonup`` `\rightharpoonup` ``\rightharpoonup`` + `\leftleftarrows` ``\leftleftarrows`` `\rightrightarrows` ``\rightrightarrows`` + `\leftrightarrow` ``\leftrightarrow`` `\Leftrightarrow` ``\Leftrightarrow`` + `\leftrightarrows` ``\leftrightarrows`` `\rightleftarrows` ``\rightleftarrows`` + `\leftrightharpoons` ``\leftrightharpoons`` `\rightleftharpoons` ``\rightleftharpoons`` + `\leftrightsquigarrow` ``\leftrightsquigarrow`` `\rightsquigarrow` ``\rightsquigarrow`` + `\Lleftarrow` ``\Lleftarrow`` `\Rrightarrow` ``\Rrightarrow`` + `\longleftarrow` ``\longleftarrow`` `\longrightarrow` ``\longrightarrow`` + `\Longleftarrow` ``\Longleftarrow`` `\Longrightarrow` ``\Longrightarrow`` + `\longleftrightarrow` ``\longleftrightarrow`` `\Longleftrightarrow` ``\Longleftrightarrow`` + `\looparrowleft` ``\looparrowleft`` `\looparrowright` ``\looparrowright`` + `\Lsh` ``\Lsh`` `\Rsh` ``\Rsh`` + `\mapsto` ``\mapsto`` `\longmapsto` ``\longmapsto`` + `\multimap` ``\multimap`` + `\nleftarrow` ``\nleftarrow`` `\nrightarrow` ``\nrightarrow`` + `\nLeftarrow` ``\nLeftarrow`` `\nRightarrow` ``\nRightarrow`` + `\nleftrightarrow` ``\nleftrightarrow`` `\nLeftrightarrow` ``\nLeftrightarrow`` + `\nwarrow` ``\nwarrow`` `\nearrow` ``\nearrow`` + `\swarrow` ``\swarrow`` `\searrow` ``\searrow`` + `\twoheadleftarrow` ``\twoheadleftarrow`` `\twoheadrightarrow` ``\twoheadrightarrow`` + `\upharpoonleft` ``\upharpoonleft`` `\upharpoonright` ``\upharpoonright`` + `\downharpoonleft` ``\downharpoonleft`` `\downharpoonright` ``\downharpoonright`` + `\upuparrows` ``\upuparrows`` `\downdownarrows` ``\downdownarrows`` + ====================== ======================== ===================== ======================= + +Synonyms: `\gets` ``\gets``, `\to` ``\to``, `\restriction` ``\restriction``. + +Comparison +~~~~~~~~~~ + +.. class:: colwidths-auto + +================ ================== ============= =============== ============= =============== =============== ================= +`<` ``<`` `\geq` ``\geq`` `\ll` ``\ll`` `\prec` ``\prec`` +`=` ``=`` `\geqq` ``\geqq`` `\lll` ``\lll`` `\precapprox` ``\precapprox`` +`>` ``>`` `\geqslant` ``\geqslant`` `\lnapprox` ``\lnapprox`` `\preccurlyeq` ``\preccurlyeq`` +`\approx` ``\approx`` `\gg` ``\gg`` `\lneq` ``\lneq`` `\preceq` ``\preceq`` +`\approxeq` ``\approxeq`` `\ggg` ``\ggg`` `\lneqq` ``\lneqq`` `\precnapprox` ``\precnapprox`` +`\asymp` ``\asymp`` `\gnapprox` ``\gnapprox`` `\lnsim` ``\lnsim`` `\precneqq` ``\precneqq`` +`\backsim` ``\backsim`` `\gneq` ``\gneq`` `\ncong` ``\ncong`` `\precnsim` ``\precnsim`` +`\backsimeq` ``\backsimeq`` `\gneqq` ``\gneqq`` `\neq` ``\neq`` `\precsim` ``\precsim`` +`\bumpeq` ``\bumpeq`` `\gnsim` ``\gnsim`` `\ngeq` ``\ngeq`` `\risingdotseq` ``\risingdotseq`` +`\Bumpeq` ``\Bumpeq`` `\gtrapprox` ``\gtrapprox`` `\ngeqq` ``\ngeqq`` `\sim` ``\sim`` +`\circeq` ``\circeq`` `\gtreqless` ``\gtreqless`` `\ngeqslant` ``\ngeqslant`` `\simeq` ``\simeq`` +`\cong` ``\cong`` `\gtreqqless` ``\gtreqqless`` `\ngtr` ``\ngtr`` `\succ` ``\succ`` +`\curlyeqprec` ``\curlyeqprec`` `\gtrless` ``\gtrless`` `\nleq` ``\nleq`` `\succapprox` ``\succapprox`` +`\curlyeqsucc` ``\curlyeqsucc`` `\gtrsim` ``\gtrsim`` `\nleqq` ``\nleqq`` `\succcurlyeq` ``\succcurlyeq`` +`\doteq` ``\doteq`` `\leq` ``\leq`` `\nleqslant` ``\nleqslant`` `\succeq` ``\succeq`` +`\doteqdot` ``\doteqdot`` `\leqq` ``\leqq`` `\nless` ``\nless`` `\succnapprox` ``\succnapprox`` +`\eqcirc` ``\eqcirc`` `\leqslant` ``\leqslant`` `\nprec` ``\nprec`` `\succneqq` ``\succneqq`` +`\eqsim` ``\eqsim`` `\lessapprox` ``\lessapprox`` `\npreceq` ``\npreceq`` `\succnsim` ``\succnsim`` +`\eqslantgtr` ``\eqslantgtr`` `\lesseqgtr` ``\lesseqgtr`` `\nsim` ``\nsim`` `\succsim` ``\succsim`` +`\eqslantless` ``\eqslantless`` `\lesseqqgtr` ``\lesseqqgtr`` `\nsucc` ``\nsucc`` `\thickapprox` ``\thickapprox`` +`\equiv` ``\equiv`` `\lessgtr` ``\lessgtr`` `\nsucceq` ``\nsucceq`` `\thicksim` ``\thicksim`` +`\fallingdotseq` ``\fallingdotseq`` `\lesssim` ``\lesssim`` `\triangleq` ``\triangleq`` +================ ================== ============= =============== ============= =============== =============== ================= + +The commands ``\lvertneqq`` and ``\gvertneqq`` are not supported by +LateX2MathML, as there is no corresponding Unicode character. + +Synonyms: `\ne` ``\ne``, `\le` ``\le``, `\ge` ``\ge``, +`\Doteq` ``\Doteq``, `\llless` ``\llless``, `\gggtr` ``\gggtr``. + +Symbols can be negated prepending ``\not``, e.g. +`\not=` ``\not=``, `\not\equiv` ``\not\equiv``, +`\not\gtrless` ``\not\gtrless``, `\not\lessgtr` ``\not\lessgtr``. + +Miscellaneous relations +~~~~~~~~~~~~~~~~~~~~~~~ +.. class:: colwidths-auto + + ===================== ======================= =================== ===================== =================== ===================== + `\backepsilon` ``\backepsilon`` `\ntrianglelefteq` ``\ntrianglelefteq`` `\subseteq` ``\subseteq`` + `\because` ``\because`` `\ntriangleright` ``\ntriangleright`` `\subseteqq` ``\subseteqq`` + `\between` ``\between`` `\ntrianglerighteq` ``\ntrianglerighteq`` `\subsetneq` ``\subsetneq`` + `\blacktriangleleft` ``\blacktriangleleft`` `\nvdash` ``\nvdash`` `\subsetneqq` ``\subsetneqq`` + `\blacktriangleright` ``\blacktriangleright`` `\nVdash` ``\nVdash`` `\supset` ``\supset`` + `\bowtie` ``\bowtie`` `\nvDash` ``\nvDash`` `\Supset` ``\Supset`` + `\dashv` ``\dashv`` `\nVDash` ``\nVDash`` `\supseteq` ``\supseteq`` + `\frown` ``\frown`` `\parallel` ``\parallel`` `\supseteqq` ``\supseteqq`` + `\in` ``\in`` `\perp` ``\perp`` `\supsetneq` ``\supsetneq`` + `\mid` ``\mid`` `\pitchfork` ``\pitchfork`` `\supsetneqq` ``\supsetneqq`` + `\models` ``\models`` `\propto` ``\propto`` `\therefore` ``\therefore`` + `\ni` ``\ni`` `\shortmid` ``\shortmid`` `\trianglelefteq` ``\trianglelefteq`` + `\nmid` ``\nmid`` `\shortparallel` ``\shortparallel`` `\trianglerighteq` ``\trianglerighteq`` + `\notin` ``\notin`` `\smallfrown` ``\smallfrown`` `\varpropto` ``\varpropto`` + `\nparallel` ``\nparallel`` `\smallsmile` ``\smallsmile`` `\vartriangle` ``\vartriangle`` + `\nshortmid` ``\nshortmid`` `\smile` ``\smile`` `\vartriangleleft` ``\vartriangleleft`` + `\nshortparallel` ``\nshortparallel`` `\sqsubset` ``\sqsubset`` `\vartriangleright` ``\vartriangleright`` + `\nsubseteq` ``\nsubseteq`` `\sqsubseteq` ``\sqsubseteq`` `\vdash` ``\vdash`` + `\nsubseteqq` ``\nsubseteqq`` `\sqsupset` ``\sqsupset`` `\Vdash` ``\Vdash`` + `\nsupseteq` ``\nsupseteq`` `\sqsupseteq` ``\sqsupseteq`` `\vDash` ``\vDash`` + `\nsupseteqq` ``\nsupseteqq`` `\subset` ``\subset`` `\Vvdash` ``\Vvdash`` + `\ntriangleleft` ``\ntriangleleft`` `\Subset` ``\Subset`` + ===================== ======================= =================== ===================== =================== ===================== + +Synonyms: `\owns` ``\owns``. + +Symbols can be negated prepending ``\not``, e.g. +`\not\in` ``\not\in``, `\not\ni` ``\not\ni``. + +The commands ``\varsubsetneq``, ``\varsubsetneqq``, ``\varsupsetneq``, +and ``\varsupsetneqq`` are not supported by LateX2MathML, as there is no +corresponding Unicode character. + +Variable-sized operators +------------------------ +.. class:: colwidths-auto + + ========================= ========================= ========================= =========================== + `\sum` ``\sum`` `\prod` ``\prod`` `\bigcap` ``\bigcap`` `\bigodot` ``\bigodot`` + `\int` ``\int`` `\coprod` ``\coprod`` `\bigcup` ``\bigcup`` `\bigoplus` ``\bigoplus`` + `\oint` ``\oint`` `\bigwedge` ``\bigwedge`` `\biguplus` ``\biguplus`` `\bigotimes` ``\bigotimes`` + `\smallint` ``\smallint`` `\bigvee` ``\bigvee`` `\bigsqcup` ``\bigsqcup`` + ========================= ========================= ========================= =========================== + +Larger symbols are used in displayed formulas, sum-like symbols have +indices above/below the symbol (see also `scripts and limits`_): + +.. math:: \sum_{n=1}^N a_n \qquad + \int_0^1f(x)\,dx \qquad + \prod_{i=1}^{10} b_i \ldots + +Notations +========= + +Top and bottom embellishments +----------------------------- + +See `Accents and embellishments`_. + +Extensible arrows +----------------- + +\xleftarrow and \xrightarrow produce arrows that extend automatically to +accommodate unusually wide subscripts or superscripts. These commands +take one optional argument (the subscript) and one mandatory argument +(the superscript, possibly empty):: + + A \xleftarrow{n+\mu-1} B \xrightarrow[T]{n\pm i-1} C + +results in + +.. math:: A \xleftarrow{n+\mu-1} B \xrightarrow[T]{n\pm i-1} C + +Affixing symbols to other symbols +--------------------------------- + +In addition to the standard `accents and embellishments`_, other symbols +can be placed above or below a base symbol with the ``\overset`` and +``\underset`` commands. The symbol is set in "scriptstyle" (smaller font +size). For example, writing ``\overset{*}{X}`` becomes `\overset{*}{X}` +and ``\underset{+}{M}`` becomes `\underset{+}{M}`. + + +Matrices +-------- + +The ``matrix`` and ``cases`` environments can also contain ``\\`` and +``&``:: + + .. math:: + \left ( \begin{matrix} a & b \\ c & d \end{matrix}\right) + +Result: + +.. math:: + \left ( \begin{matrix} a & b \\ c & d \end{matrix} \right) + +The environments ``pmatrix``, ``bmatrix``, ``Bmatrix``, ``vmatrix``, and +``Vmatrix`` have (respectively) ( ), [ ], { }, \| \|, and `\Vert\ \Vert` +delimiters built in, e.g. + +.. math:: \begin{pmatrix} a & b \\ c & d \end{pmatrix} \qquad + \begin{bmatrix} a & b \\ c & d \end{bmatrix} \qquad + \begin{Vmatrix} a & b \\ c & d \end{Vmatrix} + +To produce a small matrix suitable for use in text, there is a +``smallmatrix`` environment +`\bigl(\begin{smallmatrix} a & b \\ c & d \end{smallmatrix}\bigr)` +that comes closer to fitting within a single text line than a normal +matrix. + + +For piecewise function definitions there is a ``cases`` environment: + +.. math:: \mathrm{sgn}(x) = \begin{cases} + -1 & x<0\\ + \phantom{-}1 & x>0 + \end{cases} + +Spacing commands +---------------- + +Horizontal spacing of elements can be controlled with the following +commands: + +.. class:: colwidths-auto + + ====================== ======== ===================== ================== + :m:`3\qquad 4` ``3\qquad 4`` = 2em + :m:`3\quad 4` ``3\quad 4`` = 1em + :m:`3~4` ``3~4`` ``3\nobreakspace 4`` + :m:`3\ 4` ``3\ 4`` escaped space + :m:`3\;4` ``3\;4`` ``3\thickspace 4`` + :m:`3\:4` ``3\:4`` ``3\medspace 4`` + :m:`3\,4` ``3\,4`` ``3\thinspace 4`` + :m:`3 4` ``3 4`` regular space [#]_ + :m:`3\!4` ``3\!4`` ``3\negthinspace 4`` + :m:`3\negmedspace 4` ``3\negmedspace 4`` + :m:`3\negthickspace 4` ``3\negthickspace 4`` + `3\hspace{1ex}4` ``3\hspace{1ex}4`` custom length + `3\mspace{20mu}4` ``3\mspace{20mu}4`` custom length [#]_ + ====================== ======== ===================== ================== + +.. [#] Whitespace characters are ignored in LaTeX math mode. +.. [#] Unit must be 'mu' (1 mu = 1/18em). + +Negative spacing does not work with MathML (in Firefox 78). + +There are also three commands that leave a space equal to the height and +width of its argument. For example ``\phantom{XXX}`` results in space as +wide and high as three X’s: + +.. math:: \frac{\phantom{XXX}+1}{XXX-1} + +The commands ``\hphantom`` and ``\vphantom`` insert space with the +width or height of the argument. They are not supported with `math_output`_ +MathML. + +Roots +----- + +.. class:: colwidths-auto + + ========= ==================== ================== + command example result + ========= ==================== ================== + ``\sqrt`` ``\sqrt{x^2-1}`` `\sqrt{x^2-1}` + .. ``\sqrt[3n]{x^2-1}`` `\sqrt[3n]{x^2-1}` + .. ``\sqrt\frac{1}{2}`` `\sqrt\frac{1}{2}` + ========= ==================== ================== + +Boxed formulas +-------------- + +The command ``\boxed`` puts a box around its argument: + +.. math:: \boxed{\eta \leq C(\delta(\eta) +\Lambda_M(0,\delta))} + + + +Fractions and related constructions +=================================== + +The ``\frac`` command takes two ar guments, numerator and denominator, +and typesets them in normal fraction form. For example, ``U = \frac{R}{I}`` +produces `U = \frac{R}{I}`. Use ``\dfrac`` or ``\tfrac`` to +force text style and display style respectively. + +.. math:: \frac{x+1}{x-1} \quad + \dfrac{x+1}{x-1} \quad + \tfrac{x+1}{x-1} + +and in text: `\frac{x+1}{x-1}`, `\dfrac{x+1}{x-1}`, `\tfrac{x+1}{x-1}`. + +For binomial expressions such as `\binom{n}{k}`, +there are ``\binom``, ``\dbinom`` and ``\tbinom`` commands:: + + 2^k-\binom{k}{1}2^{k-1}+\binom{k}{2}2^{k-2} + +prints + +.. math:: 2^k-\binom{k}{1}2^{k-1}+\binom{k}{2}2^{k-2} + +The ``\cfrac`` command for continued fractions uses displaystyle and +padding for sub-fractions: + +.. math:: \frac{\pi}{4} = 1 + \cfrac{1^2}{ + 2 + \cfrac{3^2}{ + 2 + \cfrac{5^2}{ + 2 + \cfrac{7^2}{2 + \cdots} + }}} + \qquad \text{vs.}\qquad + \frac{\pi}{4} = 1 + \frac{1^2}{ + 2 + \frac{3^2}{ + 2 + \frac{5^2}{ + 2 + \frac{7^2}{2 + \cdots} + }}} + +It supports the optional argument ``[l]`` or ``[r]`` for +left or right placement of the numerator: + +.. math:: \cfrac[l]{x}{x-1} \quad + \cfrac{x}{x-1} \quad + \cfrac[r]{x}{x-1} + + +Delimiter sizes +=============== + +Besides the automatic scaling of `extensible delimiters`_ with ``\left`` +and ``\right``, there are four commands to manually select delimiters of +fixed size: + +.. class:: colwidths-auto + + ========= ============== ============== ============== ============== =============== =============== + Sizing no ``\left`` ``\bigl`` ``\Bigl`` ``\biggl`` ``\Biggl`` + command ``\right`` ``\bigr`` ``\Bigr`` ``\biggr`` ``\Biggr`` + --------- -------------- -------------- -------------- -------------- --------------- --------------- + Result `\displaystyle `\displaystyle `\displaystyle `\displaystyle `\displaystyle `\displaystyle + (b) \left(b\right) \bigl(b\bigr) \Bigl(b\Bigr) \biggl(b\biggr) \Biggl(b\Biggr) + (\frac{c}{d})` \left(\frac{c} \bigl(\frac{c} \Bigl(\frac{c} \biggl(\frac{c} \Biggl(\frac{c} + {d}\right)` {d}\bigr)` {d}\Bigr)` {d}\biggr)` {d}\Biggr)` + ========= ============== ============== ============== ============== =============== =============== + +There are two or three situations where the delimiter size is commonly +adjusted using these commands: + +The first kind of adjustment is done for cumulative operators with +limits, such as summation signs. With ``\left`` and ``\right`` the +delimiters usually turn out larger than necessary, and using the ``Big`` +or ``bigg`` sizes instead gives better results: + +.. math:: + \left[\sum_i a_i\left\lvert\sum_j x_{ij}\right\rvert^p\right]^{1/p} + \text{ versus } + \biggl[\sum_i a_i\Bigl\lvert\sum_j x_{ij}\Bigr\rvert^p\biggr]^{1/p} + +The second kind of situation is clustered pairs of delimiters, where +\left and \right make them all the same size (because that is adequate to +cover the encompassed material), but what you really want is to make some +of the delimiters slightly larger to make the nesting easier to see. + +.. math:: \left((a_1 b_1) - (a_2 b_2)\right) + \left((a_2 b_1) + (a_1 b_2)\right) + \quad\text{versus}\quad + \bigl((a_1 b_1) - (a_2 b_2)\bigr) + \bigl((a_2 b_1) + (a_1 b_2)\bigr) + +The third kind of situation is a slightly oversize object in running +text, such as `\left|\frac{b'}{d'}\right|` where the delimiters produced +by ``\left`` and ``\right`` cause too much line spreading. [#]_ In that case +``\bigl`` and ``\bigr`` can be used to produce delimiters that are larger +than the base size but still able to fit within the normal line spacing: +`\bigl|\frac{b'}{d'}\bigr|`. + +.. [#] With MathML, an example would be parentheses + around a ``smallmatrix`` environment + `\left(\begin{smallmatrix} a & b \\ c & d \end{smallmatrix}\right)` + vs. `\Bigl(\begin{smallmatrix} a & b \\ c & d \end{smallmatrix}\Bigr)`. + +Text +==== + +The main use of the command ``\text`` is for words or phrases in a +display. It is similar to ``\mbox`` in its effects but, unlike ``\mbox``, +automatically produces subscript-size text if used in a subscript, +``k_{\text{B}}T`` becomes `k_{\text{B}}T`. + +Whitespace is kept inside the argument: + +.. Math:: f_{[x_{i-1},x_i]} \text{ is monotonic for } i = 1,\,…,\,c+1 + + +The text may contain math commands wrapped in ``$`` signs, e.g. + +.. math:: (-1)^{n_i} = \begin{cases} -1 \quad \text{if $n_i$ is odd,} \\ + +1 \quad \text{if $n_i$ is even.} + \end{cases} + +.. TODO ignore {}, handle text-mode commands + +  +.. TODO: ``\mod`` and its relatives + -------------------------- + + Commands ``\mod``, ``\bmod``, ``\pmod``, ``\pod`` deal with the special + spacing conventions of “mod” notation. ``\mod`` and ``\pod`` are + variants of ``\pmod`` preferred by some authors; ``\mod`` omits the + parentheses, whereas ``\pod`` omits the “mod” and retains the + parentheses. + + \gcd(n,m\bmod n) ;\quad x\equiv y\pmod b + ;\quad x\equiv y\mod c ;\quad x\equiv y\pod d + + +Integrals and sums +================== + +The limits on integrals, sums, and similar symbols are placed either to +the side of or above and below the base symbol, depending on convention +and context. In inline formulas and fractions, the limits on sums, and +similar symbols like + +.. math:: \lim_{n\to\infty} \sum_1^n \frac{1}{n} + +move to index positions: `\lim_{n\to\infty} \sum_1^n \frac{1}{n}`. + +Altering the placement of limits +-------------------------------- + +The commands ``\intop`` and ``\ointop`` produce integral signs with +limits as in sums and similar: `\intop_0^1`, `\ointop_c` and + +.. math:: \intop_0^1 \quad \ointop_c + \quad \text{vs.} \quad + \int^1_0 \quad \oint_c + +The commands ``\limits`` and ``\nolimits`` override the default placement +of the limits for any operator; ``\displaylimits`` forces standard +positioning as for the \sum command. They should follow immediately after +the operator to which they apply. + +Compare the same term with default positions, ``\limits``, and +``\nolimits`` in inline and display mode: `\lim_{x\to0}f(x)`, +`\lim\limits_{x\to0}f(x)`, `\lim\nolimits_{x\to0}f(x)`, vs. + +.. math:: \lim_{x\to0}f(x), \quad + \lim\limits_{x\to0}f(x) \quad + \lim\nolimits_{x\to0}f(x). + +.. TODO: \substack + +.. TODO: \sideset + + +Changing the size of elements in a formula +========================================== + +The declarations [#]_ ``\displaystyle``, ``\textstyle``, +``\scriptstyle``, and ``\scriptscriptstyle``, select a symbol size and +spacing that would be applied in (respectively) display math, inline +math, first-order subscript, or second-order subscript, even when the +current context would normally yield some other size. + +For example ``:math:`\displaystyle \sum_{n=0}^\infty +\frac{1}{n}``` is printed as `\displaystyle \sum_{n=0}^\infty \frac{1}{n}` +rather than `\sum_{n=0}^\infty \frac{1}{n}` and :: + + \frac{\scriptstyle\sum_{n > 0} z^n} + {\displaystyle\prod_{1\leq k\leq n} (1-q^k)} + +yields + +.. math:: + + \frac{\scriptstyle\sum_{n > 0} z^n} + {\displaystyle\prod_{1\leq k\leq n} (1-q^k)} + \text{ instead of the default } + \frac{\sum_{n > 0} z^n} + {\prod_{1\leq k\leq n} (1-q^k)}. + +.. [#] "Declarations" are commands that affect processing of the current + "group". In particular, notice where the braces fall that delimit the + effect of the command: Right: ``{\displaystyle ...}`` Wrong: + ``\displaystyle{...}``. + + With math_output_ MathML, the declaration must be the first element + after the opening bracket. + + +Appendix +======== + +Tests +----- + + +Font changes +~~~~~~~~~~~~ + +Math alphabet macros change the default alphabet ("mathvariant" in +MathML), leaving some symbols unchanged: + +:normal: `abs(x) \pm \alpha \approx 3 \Gamma \quad \forall x \in R` +:mathrm: `\mathrm{abs(x) \pm \alpha \approx 3 \Gamma \quad \forall x \in R}` +:mathit: `\mathit{abs(x) \pm \alpha \approx 3 \Gamma \quad \forall x \in R}` +:mathsf: `\mathsf{abs(x) \pm \alpha \approx 3 \Gamma \quad \forall x \in R}` +:mathbb: `\mathbb{abs(x) \pm \alpha \approx 3 \Gamma \quad \forall x \in R}` +:mathbf: `\mathbf{abs(x) \pm \alpha \approx 3 \Gamma \quad \forall x \in R}` +:mathcal: `\mathcal{abs(x) \pm \alpha \approx 3 \Gamma \quad \forall x \in R}` +:mathscr: `\mathscr{abs(x) \pm \alpha \approx 3 \Gamma \quad \forall x \in R}` + +Unicode supports the following blackboard-bold characters: +`\mathbb{a \ldots z A \ldots Z 0 \ldots 9 +\mathbb\Gamma \mathbb{\Pi} \mathbb {\Sigma} \mathbb\gamma \mathbb\pi}`. + + +Inferred <mrow>s in MathML +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The elements <msqrt>, <mstyle>, <merror>, <mpadded>, <mphantom>, <menclose>, +<mtd>, <mscarry>, and <math> treat their contents as a single inferred mrow +formed from all their children. + +.. math:: a = \sqrt 2 + x,\quad + b = \sqrt{1+x^2},\quad + c = \sqrt\frac{\sin(x)}{23}, + +inline: :math:`a = \sqrt 2 + x, b = \sqrt{1+x^2}, c = \sqrt\frac{\sin(x)}{23}`. + + +Scripts and Limits +~~~~~~~~~~~~~~~~~~ + +Accents should be nearer to the base (in MathML Firefox 78, it's vice versa!): +`\bar a \overline a, \bar l \overline l, \bar i \overline i`, +`\vec{r}` `\overrightarrow{r}`. + +Sub- and superscript may be given in any order: +`x_i^j = x^j_i` and `\int_0^1 = \int^1_0`. + +Double exponent: `x^{10^4}`, `r_{T_\mathrm{in}}` and `x_i^{n^2}`. + + +Nested groups +~~~~~~~~~~~~~ + +tex-token returns "{" for nested groups: + +.. math:: \text{das ist ein {toller} text (unescaped \{ and \} is + ignored by LaTeX)} + +Big delimiters and symbols +~~~~~~~~~~~~~~~~~~~~~~~~~~ +Compare automatic sizing with fixed sizes: + +.. math: \left( \frac{\frac1x}{\frac{1}{n}}\right) &= \Biggl(\text{Bigg}\Biggr)\\ + + +.. math:: + \left( 3 \right) + \left( f(x) \right) + \left( \bar x \right) + \left( \overline x \right) + \left( n_i \right) &= () \\ + \left( \underline x \right) &= \bigl(\text{big}\bigr)\\ + \left( 3^2 \right) + \left( \sqrt{3} \right) + \left( \sqrt{3^2} \right) + \left( \sum \right) + \left( \bigotimes \right) + \left( \prod \right) &= \Bigl(\text{Big}\Bigr)\\ + \left( \frac{3 }{2} \right) + \left( \frac{3^2}{2^4} \right) + \binom{3 }{2} + \begin{pmatrix} a & b \\ c & d \end{pmatrix} + \left( \frac{1}{\sqrt 2} \right) + \left( \int \right) + \left( \int_0 \right) + \left( \int^1 \right) + \left( \int_0^1 \right) &= \biggl(\text{bigg}\biggr)\\ + \left( \frac{\sqrt 2}{2} \right) + \left( \sum_0 \right) + \left( \sum^1 \right) + \left( \sum_0^1 \right) + \left( \frac{\frac1x}{\frac{1}{n}}\right) &= \Biggl(\text{Bigg}\Biggr)\\ + \left( \intop_0 \right) + \left( \intop^1 \right) + \left( \intop_0^1 \right) + +And in text: + +:`()`: `\left(3 \right) + \left( f(x) \right) + \left( \bar x \right) + \left( \overline x \right) + \left( n_i \right) + \left( \sum \right) + \left( \sum_0 \right) + \left( \prod \right)` + + +:`\bigl(\text{big}\bigr)`: `\left(\underline x \right) + \left( 3^2 \right) + \binom{3}{2} + \left(\begin{smallmatrix} a & b \\ + c & d \end{smallmatrix} \right) + \left( \bigotimes \right)` + +:`\Bigl(\text{Big}\Bigr)`: `\left(\sqrt{3} \right) + \left( \sqrt{3^2} \right) + \left( \frac{3}{2} \right) + \left( \frac{3^2}{2^4} \right) + \left( \frac{\sqrt 2}{2} \right) + \left( \int \right) + \left( \int_0 \right) + \left( \int^1 \right) + \left( \int_0^1 \right) + \left( \sum^1 \right) + \left( \sum_0^1 \right) + \left( \frac{\frac1x}{\frac{1}{n}}\right)` + + + + + +Test ``\left``, ``\right``, and the \bigl/\bigr, … size commands +with all extensible delimiters. + +.. math:: + \left.(b\right)\ \bigl(b\Bigr)\ \biggl(b\Biggr) + \quad + \left.[b\right]\ \bigl[b\Bigr]\ \biggl[b\Biggr] + \quad + \left.\{b\right \} \ \bigl\{b\Bigr \} \ \biggl\{b\Biggr \} + \quad + \left.\langle b\right\rangle\ \bigl\langle b\Bigr\rangle\ \biggl\langle b\Biggr\rangle + + \left.\lceil b\right\rceil\ \bigl\lceil b\Bigr\rceil\ \biggl\lceil b\Biggr\rceil + \quad + \left.\lfloor b\right\rfloor\ \bigl\lfloor b\Bigr\rfloor\ \biggl\lfloor b\Biggr\rfloor + \quad + \left.\lvert b\right\rvert\ \bigl\lvert b\Bigr\rvert\ + \biggl\lvert b\Biggr\rvert + \quad + \left.\lVert b\right\rVert\ \bigl\lVert b\Bigr\rVert\ + \biggl\lVert b\Biggr\rVert + + \left.\lgroup b\right\rgroup\ \bigl\lgroup b\Bigr\rgroup\ \biggl\lgroup b\Biggr\rgroup + \quad + \left.\lmoustache b\right\rmoustache\ \bigl\lmoustache b\Bigr\rmoustache\ \biggl\lmoustache b\Biggr\rmoustache + \quad + \left./b\right\backslash\ \bigl/b\Bigr\backslash\ \biggl/b\Biggr\backslash + + \left.|b\right\|\ \bigl|b\Bigr\|\ \biggl|b\Biggr\| + \quad + \left.\vert b\right\Vert\ \bigl\vert b\Bigr\Vert\ \biggl\vert b\Biggr\Vert + \quad + \left.\arrowvert b\right\Arrowvert\ \bigl\arrowvert b\Bigr\Arrowvert\ \biggl\arrowvert b\Biggr\Arrowvert + \quad + \left.\bracevert b\right\bracevert\ \bigl\bracevert b\Bigr\bracevert\ \biggl\bracevert b\Biggr\bracevert + \quad + \left.\vert b\right\Vert\ \bigl\vert b\Bigr\Vert\ \biggl\vert b\Biggr\Vert + + +Variable-sized operators: + +Inline: `\int\ \iint\ \iiint\ \iiiint\ \idotsint \oint\ \smallint\ +\sum\ \prod\ \coprod\ \bigwedge\ \bigvee\ \bigcap\ \bigcup\ \biguplus\ +\bigsqcup\ \bigodot\ \bigoplus\ \bigotimes` and Display: + +.. math:: \int\ \iint\ \iiint\ \iiiint\ \idotsint\ \oint\ \smallint\ + \sum\ \prod\ \coprod\ \bigwedge\ \bigvee\ \bigcap\ \bigcup\ + \biguplus\ \bigsqcup\ \bigodot\ \bigoplus\ \bigotimes + +.. math:: \int_1 f\ \intop_1 f\ \iint_1 f\ \smallint_1 f\ \sum_1\ + \prod_1\ \bigwedge_1\ \bigcap_1\ \biguplus_1\ \bigodot_1\ \int^N\ + \intop^N\ \iiiint^N\ \oint^N\ \smallint^N\ \sum^N\ \coprod^N\ + \bigvee^N\ \bigcup^N\ \bigsqcup^N\ \bigotimes^N + +.. math:: \int_1^N\ \intop_1^N\ \iint_1^N\ \iiint_1^N\ \iiiint_1^N\ + \idotsint_1^N\ \oint_1^N\ \smallint_1^N\ \sum_1^N\ \prod_1^N\ + \coprod_1^N\ \bigwedge_1^N\ \bigvee_1^N\ \bigcap_1^N\ \bigcup_1^N + \ \biguplus_1^N\ \bigsqcup_1^N\ \bigodot_1^N\ \bigoplus_1^N\ + \bigotimes_1^N + + +Text +~~~~ + +The text may contain non-ASCII characters: `n_\text{Stoß}`. + +Some text-mode LaTeX commands are supported with math_output_ "html". +In other output formats, use literal Unicode: `\text{ç é è ë ê ñ ů ž ©}` +to get the result of the accent macros +`\text{\c{c} \'e \`e \"e \^e \~n \r{u} \v{z} \textcircled{c}}`. diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/ref/rst/restructuredtext.txt b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/ref/rst/restructuredtext.txt new file mode 100644 index 00000000..e28a5946 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/ref/rst/restructuredtext.txt @@ -0,0 +1,3267 @@ +.. -*- coding: utf-8 -*- + +======================================= + reStructuredText Markup Specification +======================================= + +:Author: David Goodger +:Contact: docutils-develop@lists.sourceforge.net +:Revision: $Revision$ +:Date: $Date$ +:Copyright: This document has been placed in the public domain. + +.. Note:: + + This document is a detailed technical specification; it is not a + tutorial or a primer. If this is your first exposure to + reStructuredText, please read `A ReStructuredText Primer`_ and the + `Quick reStructuredText`_ user reference first. + +.. _A ReStructuredText Primer: ../../user/rst/quickstart.html +.. _Quick reStructuredText: ../../user/rst/quickref.html + + +reStructuredText_ is plaintext that uses simple and intuitive +constructs to indicate the structure of a document. These constructs +are equally easy to read in raw and processed forms. This document is +itself an example of reStructuredText (raw, if you are reading the +text file, or processed, if you are reading an HTML document, for +example). The reStructuredText parser is a component of Docutils_. + +Simple, implicit markup is used to indicate special constructs, such +as section headings, bullet lists, and emphasis. The markup used is +as minimal and unobtrusive as possible. Less often-used constructs +and extensions to the basic reStructuredText syntax may have more +elaborate or explicit markup. + +reStructuredText is applicable to documents of any length, from the +very small (such as inline program documentation fragments, e.g. +Python docstrings) to the quite large (this document). + +The first section gives a quick overview of the syntax of the +reStructuredText markup by example. A complete specification is given +in the `Syntax Details`_ section. + +`Literal blocks`_ (in which no markup processing is done) are used for +examples throughout this document, to illustrate the plaintext markup. + + +.. contents:: + + +----------------------- + Quick Syntax Overview +----------------------- + +A reStructuredText document is made up of body or block-level +elements, and may be structured into sections. Sections_ are +indicated through title style (underlines & optional overlines). +Sections contain body elements and/or subsections. Some body elements +contain further elements, such as lists containing list items, which +in turn may contain paragraphs and other body elements. Others, such +as paragraphs, contain text and `inline markup`_ elements. + +Here are examples of `body elements`_: + +- Paragraphs_ (and `inline markup`_):: + + Paragraphs contain text and may contain inline markup: + *emphasis*, **strong emphasis**, `interpreted text`, ``inline + literals``, standalone hyperlinks (https://www.python.org), + external hyperlinks (Python_), internal cross-references + (example_), footnote references ([1]_), citation references + ([CIT2002]_), substitution references (|example|), and _`inline + internal targets`. + + Paragraphs are separated by blank lines and are left-aligned. + +- Five types of lists: + + 1. `Bullet lists`_:: + + - This is a bullet list. + + - Bullets can be "*", "+", or "-". + + 2. `Enumerated lists`_:: + + 1. This is an enumerated list. + + 2. Enumerators may be arabic numbers, letters, or roman + numerals. + + 3. `Definition lists`_:: + + what + Definition lists associate a term with a definition. + + how + The term is a one-line phrase, and the definition is one + or more paragraphs or body elements, indented relative to + the term. + + 4. `Field lists`_:: + + :what: Field lists map field names to field bodies, like + database records. They are often part of an extension + syntax. + + :how: The field marker is a colon, the field name, and a + colon. + + The field body may contain one or more body elements, + indented relative to the field marker. + + 5. `Option lists`_, for listing command-line options:: + + -a command-line option "a" + -b file options can have arguments + and long descriptions + --long options can be long also + --input=file long options can also have + arguments + /V DOS/VMS-style options too + + There must be at least two spaces between the option and the + description. + +- `Literal blocks`_:: + + Literal blocks are either indented or line-prefix-quoted blocks, + and indicated with a double-colon ("::") at the end of the + preceding paragraph (right here -->):: + + if literal_block: + text = 'is left as-is' + spaces_and_linebreaks = 'are preserved' + markup_processing = None + +- `Block quotes`_:: + + Block quotes consist of indented body elements: + + This theory, that is mine, is mine. + + -- Anne Elk (Miss) + +- `Doctest blocks`_:: + + >>> print 'Python-specific usage examples; begun with ">>>"' + Python-specific usage examples; begun with ">>>" + >>> print '(cut and pasted from interactive Python sessions)' + (cut and pasted from interactive Python sessions) + +- Two syntaxes for tables_: + + 1. `Grid tables`_; complete, but complex and verbose:: + + +------------------------+------------+----------+ + | Header row, column 1 | Header 2 | Header 3 | + +========================+============+==========+ + | body row 1, column 1 | column 2 | column 3 | + +------------------------+------------+----------+ + | body row 2 | Cells may span | + +------------------------+-----------------------+ + + 2. `Simple tables`_; easy and compact, but limited:: + + ==================== ========== ========== + Header row, column 1 Header 2 Header 3 + ==================== ========== ========== + body row 1, column 1 column 2 column 3 + body row 2 Cells may span columns + ==================== ====================== + +- `Explicit markup blocks`_ all begin with an explicit block marker, + two periods and a space: + + - Footnotes_:: + + .. [1] A footnote contains body elements, consistently + indented by at least 3 spaces. + + - Citations_:: + + .. [CIT2002] Just like a footnote, except the label is + textual. + + - `Hyperlink targets`_:: + + .. _Python: https://www.python.org + + .. _example: + + The "_example" target above points to this paragraph. + + - Directives_:: + + .. image:: mylogo.png + + - `Substitution definitions`_:: + + .. |symbol here| image:: symbol.png + + - Comments_:: + + .. Comments begin with two dots and a space. Anything may + follow, except for the syntax of footnotes/citations, + hyperlink targets, directives, or substitution definitions. + + +---------------- + Syntax Details +---------------- + +Descriptions below list "doctree elements" (document tree element +names; XML DTD generic identifiers) corresponding to syntax +constructs. For details on the hierarchy of elements, please see `The +Docutils Document Tree`_ and the `Docutils Generic DTD`_ XML document +type definition. + + +Whitespace +========== + +Spaces are recommended for indentation_, but tabs may also be used. +Tabs will be converted to spaces. Tab stops are at every 8th column +(processing systems may make this value configurable). + +Other whitespace characters (form feeds [chr(12)] and vertical tabs +[chr(11)]) are converted to single spaces before processing. + + +Blank Lines +----------- + +Blank lines are used to separate paragraphs and other elements. +Multiple successive blank lines are equivalent to a single blank line, +except within literal blocks (where all whitespace is preserved). +Blank lines may be omitted when the markup makes element separation +unambiguous, in conjunction with indentation. The first line of a +document is treated as if it is preceded by a blank line, and the last +line of a document is treated as if it is followed by a blank line. + + +Indentation +----------- + +Indentation is used to indicate -- and is only significant in +indicating -- block quotes, definitions (in `definition lists`_), +and local nested content: + +- list item content (multi-line contents of list items, and multiple + body elements within a list item, including nested lists), +- the content of `literal blocks`_, and +- the content of `explicit markup blocks`_ (directives, footnotes, ...). + +Any text whose indentation is less than that of the current level +(i.e., unindented text or "dedents") ends the current level of +indentation. + +Since all indentation is significant, the level of indentation must be +consistent. For example, indentation is the sole markup indicator for +`block quotes`_:: + + This is a top-level paragraph. + + This paragraph belongs to a first-level block quote. + + Paragraph 2 of the first-level block quote. + +Multiple levels of indentation within a block quote will result in +more complex structures:: + + This is a top-level paragraph. + + This paragraph belongs to a first-level block quote. + + This paragraph belongs to a second-level block quote. + + Another top-level paragraph. + + This paragraph belongs to a second-level block quote. + + This paragraph belongs to a first-level block quote. The + second-level block quote above is inside this first-level + block quote. + +When a paragraph or other construct consists of more than one line of +text, the lines must be left-aligned:: + + This is a paragraph. The lines of + this paragraph are aligned at the left. + + This paragraph has problems. The + lines are not left-aligned. In addition + to potential misinterpretation, warning + and/or error messages will be generated + by the parser. + +Several constructs begin with a marker, and the body of the construct +must be indented relative to the marker. For constructs using simple +markers (`bullet lists`_, `enumerated lists`_), the level of +indentation of the body is determined by the position of the first +line of text. For example:: + + - This is the first line of a bullet list + item's paragraph. All lines must align + relative to the first line. + + This indented paragraph is interpreted + as a block quote. + + Another paragraph belonging to the first list item. + + Because it is not sufficiently indented, + this paragraph does not belong to the list + item (it's a block quote following the list). + +The body of `explicit markup blocks`_, `field lists`_, and `option +lists`_ ends above the first line with the same or less indentation +than the marker. For example, field lists may have very long markers +(containing the field names):: + + :Hello: This field has a short field name, so aligning the field + body with the first line is feasible. + + :Number-of-African-swallows-required-to-carry-a-coconut: It would + be very difficult to align the field body with the left edge + of the first line. It may even be preferable not to begin the + body on the same line as the marker. + + +.. _escape: + +Escaping Mechanism +================== + +The character set universally available to plaintext documents, 7-bit +ASCII, is limited. No matter what characters are used for markup, +they will already have multiple meanings in written text. Therefore +markup characters will sometimes appear in text without being +intended as markup. Any serious markup system requires an escaping +mechanism to override the default meaning of the characters used for +the markup. In reStructuredText we use the *backslash*, commonly used +as an escaping character in other domains. + +A backslash (``\``) escapes the following character. + +* "Escaping" backslash characters are represented by NULL characters in + the `Document Tree`_ and removed from the output document by the + Docutils writers_. + +* Escaped non-white characters are prevented from playing a role in any + markup interpretation. The escaped character represents the character + itself. (A literal backslash can be specified by two backslashes in a + row -- the first backslash escapes the second. [#caveat]_) + +* Escaped whitespace characters are removed from the output document + together with the escaping backslash. This allows for `character-level + inline markup`_. + + In `URI context` [#uri-context]_, backslash-escaped whitespace + represents a single space. + +Backslashes have no special meaning in `literal context` [#literal-context]_. +Here, a single backslash represents a literal backslash, without having +to double up. [#caveat]_ + +.. [#caveat] Please note that the reStructuredText specification and + parser do not address the issue of the representation or extraction of + text input (how and in what form the text actually *reaches* the + parser). Backslashes and other characters may serve a + character-escaping purpose in certain contexts and must be dealt with + appropriately. For example, Python uses backslashes in string + literals to escape certain characters. The simplest solution when + backslashes appear in Python docstrings is to use raw docstrings:: + + r"""This is a raw docstring. Backslashes (\) are not touched.""" + +.. [#uri-context] In contexts where Docutils expects a URI (the link + block of `external hyperlink targets`_ or the argument of an image_ or + figure_ directive), whitespace is ignored by default + +.. [#literal-context] In literal context (`literal blocks`_ and `inline + literals`_, content of the code_, math_, and raw_ directives, content + of the `"raw" role`_ and `custom roles`_ based on it), + reStructuredText markup characters lose their semantics so there is no + reason to escape them. + +.. _reference name: + +Reference Names +=============== + +`Reference names` identify elements for cross-referencing. + +.. Note:: References to a target position in external, generated documents + must use the auto-generated `identifier key`_ which may differ from the + `reference name` due to restrictions on identifiers/labels in the + output format. + +Simple reference names are single words consisting of alphanumerics +plus isolated (no two adjacent) internal hyphens, underscores, +periods, colons and plus signs; no whitespace or other characters are +allowed. Footnote labels (Footnotes_ & `Footnote References`_), citation +labels (Citations_ & `Citation References`_), `interpreted text`_ roles, +and some `hyperlink references`_ use the simple reference name syntax. + +Reference names using punctuation or whose names are phrases (two or +more space-separated words) are called "phrase-references". +Phrase-references are expressed by enclosing the phrase in backquotes +and treating the backquoted text as a reference name:: + + Want to learn about `my favorite programming language`_? + + .. _my favorite programming language: https://www.python.org + +Simple reference names may also optionally use backquotes. + +.. _`normalized reference names`: + +Reference names are whitespace-neutral and case-insensitive. When +resolving reference names internally: + +- whitespace is normalized (one or more spaces, horizontal or vertical + tabs, newlines, carriage returns, or form feeds, are interpreted as + a single space), and + +- case is normalized (all alphabetic characters are converted to + lowercase). + +For example, the following `hyperlink references`_ are equivalent:: + + - `A HYPERLINK`_ + - `a hyperlink`_ + - `A + Hyperlink`_ + +Hyperlinks_, footnotes_, and citations_ all share the same namespace +for reference names. The labels of citations (simple reference names) +and manually-numbered footnotes (numbers) are entered into the same +database as other hyperlink names. This means that a footnote_ +(defined as "``.. [#note]``") which can be referred to by a footnote +reference (``[#note]_``), can also be referred to by a plain hyperlink +reference (``note_``). Of course, each type of reference (hyperlink, +footnote, citation) may be processed and rendered differently. Some +care should be taken to avoid reference name conflicts. + + +Document Structure +================== + +Document +-------- + +Doctree element: `document <document element_>`_. + + +The top-level element of a parsed reStructuredText document is the +"document" element. After initial parsing, the document element is a +simple container for a document fragment, consisting of `body +elements`_, transitions_, and sections_, but lacking a document title +or other bibliographic elements. The code that calls the parser may +choose to run one or more optional post-parse transforms_, +rearranging the document fragment into a complete document with a +title and possibly other metadata elements (author, date, etc.; see +`Bibliographic Fields`_). + +.. _document title: + +Specifically, there is no way to indicate a document title and +subtitle explicitly in reStructuredText. [#]_ Instead, a lone top-level +section title (see Sections_ below) can be treated as the document +title. Similarly, a lone second-level section title immediately after +the "document title" can become the document subtitle. The rest of +the sections are then lifted up a level or two. See the `DocTitle +transform`_ for details. + +.. [#] The `"title" configuration setting`__ and the `"title" + directive`__ set the document's `title attribute`_ that does not + become part of the document body. + + .. _title attribute: ../doctree.html#title-attribute + __ ../../user/config.html#title + __ directives.html#metadata-document-title + + +Sections +-------- + +Doctree elements: section_, title_. + +Sections are identified through their titles, which are marked up with +adornment: "underlines" below the title text, or underlines and +matching "overlines" above the title. An underline/overline is a +single repeated punctuation character that begins in column 1 and +forms a line extending at least as far as the right edge of the title +text. [#]_ Specifically, an underline/overline character may be any +non-alphanumeric printable 7-bit ASCII character [#]_. When an +overline is used, the length and character used must match the +underline. Underline-only adornment styles are distinct from +overline-and-underline styles that use the same character. There may +be any number of levels of section titles, although some output +formats may have limits (HTML has 6 levels). + +.. [#] The key is the visual length of the title in a mono-spaced font. + The adornment may need more or less characters than title, if the + title contains wide__ or combining__ characters. + +.. [#] The following are all valid section title adornment + characters:: + + ! " # $ % & ' ( ) * + , - . / : ; < = > ? @ [ \ ] ^ _ ` { | } ~ + + Some characters are more suitable than others. The following are + recommended:: + + = - ` : . ' " ~ ^ _ * + # + +__ https://en.wikipedia.org/wiki/Halfwidth_and_fullwidth_forms#In_Unicode +__ https://en.wikipedia.org/wiki/Combining_character + +Rather than imposing a fixed number and order of section title +adornment styles, the order enforced will be the order as encountered. +The first style encountered will be an outermost title (like HTML H1), +the second style will be a subtitle, the third will be a subsubtitle, +and so on. + +Below are examples of section title styles:: + + =============== + Section Title + =============== + + --------------- + Section Title + --------------- + + Section Title + ============= + + Section Title + ------------- + + Section Title + ````````````` + + Section Title + ''''''''''''' + + Section Title + ............. + + Section Title + ~~~~~~~~~~~~~ + + Section Title + ************* + + Section Title + +++++++++++++ + + Section Title + ^^^^^^^^^^^^^ + +When a title has both an underline and an overline, the title text may +be inset, as in the first two examples above. This is merely +aesthetic and not significant. Underline-only title text may *not* be +inset. + +A blank line after a title is optional. All text blocks up to the +next title of the same or higher level are included in a section (or +subsection, etc.). + +All section title styles need not be used, nor need any specific +section title style be used. However, a document must be consistent +in its use of section titles: once a hierarchy of title styles is +established, sections must use that hierarchy. + +Each section title automatically generates a hyperlink target pointing +to the section. The text of the hyperlink target (the "reference +name") is the same as that of the section title. See `Implicit +Hyperlink Targets`_ for a complete description. + +Sections may contain `body elements`_, transitions_, and nested +sections. + + +Transitions +----------- + +Doctree element: transition_. + + Instead of subheads, extra space or a type ornament between + paragraphs may be used to mark text divisions or to signal + changes in subject or emphasis. + + (The Chicago Manual of Style, 14th edition, section 1.80) + +Transitions are commonly seen in novels and short fiction, as a gap +spanning one or more lines, with or without a type ornament such as a +row of asterisks. Transitions separate other body elements. A +transition should not begin or end a section or document, nor should +two transitions be immediately adjacent. + +The syntax for a transition marker is a horizontal line of 4 or more +repeated punctuation characters. The syntax is the same as section +title underlines without title text. Transition markers require blank +lines before and after:: + + Para. + + ---------- + + Para. + +Unlike section title underlines, no hierarchy of transition markers is +enforced, nor do differences in transition markers accomplish +anything. It is recommended that a single consistent style be used. + +The processing system is free to render transitions in output in any +way it likes. For example, horizontal rules (``<hr>``) in HTML output +would be an obvious choice. + + +Body Elements +============= + +Paragraphs +---------- + +Doctree element: paragraph_. + +Paragraphs consist of blocks of left-aligned text with no markup +indicating any other body element. Blank lines separate paragraphs +from each other and from other body elements. Paragraphs may contain +`inline markup`_. + +Syntax diagram:: + + +------------------------------+ + | paragraph | + | | + +------------------------------+ + + +------------------------------+ + | paragraph | + | | + +------------------------------+ + + +Bullet Lists +------------ + +Doctree elements: bullet_list_, list_item_. + +A text block which begins with a "*", "+", "-", "•", "‣", or "⁃", +followed by whitespace, is a bullet list item (a.k.a. "unordered" list +item). List item bodies must be left-aligned and indented relative to +the bullet; the text immediately after the bullet determines the +indentation. For example:: + + - This is the first bullet list item. The blank line above the + first list item is required; blank lines between list items + (such as below this paragraph) are optional. + + - This is the first paragraph in the second item in the list. + + This is the second paragraph in the second item in the list. + The blank line above this paragraph is required. The left edge + of this paragraph lines up with the paragraph above, both + indented relative to the bullet. + + - This is a sublist. The bullet lines up with the left edge of + the text blocks above. A sublist is a new list so requires a + blank line above and below. + + - This is the third item of the main list. + + This paragraph is not part of the list. + +Here are examples of **incorrectly** formatted bullet lists:: + + - This first line is fine. + A blank line is required between list items and paragraphs. + (Warning) + + - The following line appears to be a new sublist, but it is not: + - This is a paragraph continuation, not a sublist (since there's + no blank line). This line is also incorrectly indented. + - Warnings may be issued by the implementation. + +Syntax diagram:: + + +------+-----------------------+ + | "- " | list item | + +------| (body elements)+ | + +-----------------------+ + + +Enumerated Lists +---------------- + +Doctree elements: enumerated_list_, list_item_. + +Enumerated lists (a.k.a. "ordered" lists) are similar to bullet lists, +but use enumerators instead of bullets. An enumerator consists of an +enumeration sequence member and formatting, followed by whitespace. +The following enumeration sequences are recognized: + +- arabic numerals: 1, 2, 3, ... (no upper limit). +- uppercase alphabet characters: A, B, C, ..., Z. +- lower-case alphabet characters: a, b, c, ..., z. +- uppercase Roman numerals: I, II, III, IV, ..., MMMMCMXCIX (4999). +- lowercase Roman numerals: i, ii, iii, iv, ..., mmmmcmxcix (4999). + +In addition, the auto-enumerator, "#", may be used to automatically +enumerate a list. Auto-enumerated lists may begin with explicit +enumeration, which sets the sequence. Fully auto-enumerated lists use +arabic numerals and begin with 1. (Auto-enumerated lists are new in +Docutils 0.3.8.) + +The following formatting types are recognized: + +- suffixed with a period: "1.", "A.", "a.", "I.", "i.". +- surrounded by parentheses: "(1)", "(A)", "(a)", "(I)", "(i)". +- suffixed with a right-parenthesis: "1)", "A)", "a)", "I)", "i)". + +While parsing an enumerated list, a new list will be started whenever: + +- An enumerator is encountered which does not have the same format and + sequence type as the current list (e.g. "1.", "(a)" produces two + separate lists). + +- The enumerators are not in sequence (e.g., "1.", "3." produces two + separate lists). + +It is recommended that the enumerator of the first list item be +ordinal-1 ("1", "A", "a", "I", or "i"). Although other start-values +will be recognized, they may not be supported by the output format. A +level-1 [info] system message will be generated for any list beginning +with a non-ordinal-1 enumerator. + +Lists using Roman numerals must begin with "I"/"i" or a +multi-character value, such as "II" or "XV". Any other +single-character Roman numeral ("V", "X", "L", "C", "D", "M") will be +interpreted as a letter of the alphabet, not as a Roman numeral. +Likewise, lists using letters of the alphabet may not begin with +"I"/"i", since these are recognized as Roman numeral 1. + +The second line of each enumerated list item is checked for validity. +This is to prevent ordinary paragraphs from being mistakenly +interpreted as list items, when they happen to begin with text +identical to enumerators. For example, this text is parsed as an +ordinary paragraph:: + + A. Einstein was a really + smart dude. + +However, ambiguity cannot be avoided if the paragraph consists of only +one line. This text is parsed as an enumerated list item:: + + A. Einstein was a really smart dude. + +If a single-line paragraph begins with text identical to an enumerator +("A.", "1.", "(b)", "I)", etc.), the first character will have to be +escaped in order to have the line parsed as an ordinary paragraph:: + + \A. Einstein was a really smart dude. + +Examples of nested enumerated lists:: + + 1. Item 1 initial text. + + a) Item 1a. + b) Item 1b. + + 2. a) Item 2a. + b) Item 2b. + +Example syntax diagram:: + + +-------+----------------------+ + | "1. " | list item | + +-------| (body elements)+ | + +----------------------+ + + +Definition Lists +---------------- + +Doctree elements: definition_list_, definition_list_item_, term_, +classifier_, definition_. + +Each definition list item contains a term, optional classifiers, and a +definition. + +* A `term` is a simple one-line word or phrase. Escape_ a leading hyphen + to prevent recognition as an `option list`_ item. + +* Optional `classifiers` may follow the term on the same line, each after + an inline " : " (space, colon, space). Inline markup is parsed in the + term line before the classifier delimiters are recognized. A delimiter + will only be recognized if it appears outside of any inline markup. + +* A `definition` is a block indented relative to the term, and may + contain multiple paragraphs and other body elements. There may be no + blank line between a term line and a definition block (this + distinguishes definition lists from `block quotes`_). Blank lines are + required before the first and after the last definition list item, but + are optional in-between. + +Example:: + + term 1 + Definition 1. + + term 2 + Definition 2, paragraph 1. + + Definition 2, paragraph 2. + + term 3 : classifier + Definition 3. + + term 4 : classifier one : classifier two + Definition 4. + + \-term 5 + Without escaping, this would be an option list item. + +A definition list may be used in various ways, including: + +- As a dictionary or glossary. The term is the word itself, a + classifier may be used to indicate the usage of the term (noun, + verb, etc.), and the definition follows. + +- To describe program variables. The term is the variable name, a + classifier may be used to indicate the type of the variable (string, + integer, etc.), and the definition describes the variable's use in + the program. This usage of definition lists supports the classifier + syntax of Grouch_, a system for describing and enforcing a Python + object schema. + +Syntax diagram:: + + +----------------------------+ + | term [ " : " classifier ]* | + +--+-------------------------+--+ + | definition | + | (body elements)+ | + +----------------------------+ + + +Field Lists +----------- + +Doctree elements: field_list_, field_, field_name_, field_body_. + +Field lists are used as part of an extension syntax, such as options +for directives_, or database-like records meant for further +processing. They may also be used for two-column table-like +structures resembling database records (label & data pairs). +Applications of reStructuredText may recognize field names and +transform fields or field bodies in certain contexts. For examples, +see `Bibliographic Fields`_ below, or the "image_" and "meta_" +directives in `reStructuredText Directives`_. + +.. _field names: + +Field lists are mappings from *field names* to *field bodies*, modeled on +RFC822_ headers. A field name may consist of any characters, but +colons (":") inside of field names must be backslash-escaped +when followed by whitespace.\ [#]_ +Inline markup is parsed in field names, but care must be taken when +using `interpreted text`_ with explicit roles in field names: the role +must be a suffix to the interpreted text. Field names are +case-insensitive when further processed or transformed. The field +name, along with a single colon prefix and suffix, together form the +field marker. The field marker is followed by whitespace and the +field body. The field body may contain multiple body elements, +indented relative to the field marker. The first line after the field +name marker determines the indentation of the field body. For +example:: + + :Date: 2001-08-16 + :Version: 1 + :Authors: - Me + - Myself + - I + :Indentation: Since the field marker may be quite long, the second + and subsequent lines of the field body do not have to line up + with the first line, but they must be indented relative to the + field name marker, and they must line up with each other. + :Parameter i: integer + +The interpretation of individual words in a multi-word field name is +up to the application. The application may specify a syntax for the +field name. For example, second and subsequent words may be treated +as "arguments", quoted phrases may be treated as a single argument, +and direct support for the "name=value" syntax may be added. + +Standard RFC822_ headers cannot be used for this construct because +they are ambiguous. A word followed by a colon at the beginning of a +line is common in written text. However, in well-defined contexts +such as when a field list invariably occurs at the beginning of a +document (PEPs and email messages), standard RFC822 headers could be +used. + +Syntax diagram (simplified):: + + +--------------------+----------------------+ + | ":" field name ":" | field body | + +-------+------------+ | + | (body elements)+ | + +-----------------------------------+ + +.. [#] Up to Docutils 0.14, field markers were not recognized when + containing a colon. + +Bibliographic Fields +```````````````````` + +Doctree elements: docinfo_, address_, author_, authors_, contact_, +copyright_, date_, organization_, revision_, status_, topic_, +version_. + +When a field list is the first element in a document +(after the document title, if there is one) [#]_, it may have its fields +transformed to document bibliographic data. This bibliographic data +corresponds to the front matter of a book, such as the title page and +copyright page. + +.. [#] In addition to the document title and subtitle, also comments_, + `substitution definitions`_, `hyperlink targets`_, and "header", + "footer", "meta", and "raw" directives_ may be placed before the + bibliographic fields. + +Certain registered field names (listed below) are recognized and +transformed to the corresponding doctree elements, most becoming child +elements of the docinfo_ element. No ordering is required of these +fields, although they may be rearranged to fit the document structure, +as noted. Unless otherwise indicated below, each of the bibliographic +elements' field bodies may contain a single paragraph only. Field +bodies may be checked for `RCS keywords`_ and cleaned up. Any +unrecognized fields will remain as generic fields in the docinfo +element. + +The registered bibliographic field names and their corresponding +doctree elements are as follows: + + ============= ================ + Field name doctree element + ============= ================ + Abstract topic_ + Address address_ + Author author_ + Authors authors_ + Contact contact_ + Copyright copyright_ + Date date_ + Dedication topic_ + Organization organization_ + Revision revision_ + Status status_ + Version version_ + ============= ================ + +The "Authors" field may contain either: a single paragraph consisting +of a list of authors, separated by ";" or "," (";" is checked first, +so "Doe, Jane; Doe, John" will work.); multiple paragraphs (one per +author); or a bullet list whose elements each contain a single +paragraph per author. In some languages +(e.g. Swedish), there is no singular/plural distinction between +"Author" and "Authors", so only an "Authors" field is provided, and a +single name is interpreted as an "Author". If a single name contains +a comma, end it with a semicolon to disambiguate: ":Authors: Doe, +Jane;". + +The "Address" field is for a multi-line surface mailing address. +Newlines and whitespace will be preserved. + +The "Dedication" and "Abstract" fields may contain arbitrary body +elements. Only one of each is allowed. They become topic elements +with "Dedication" or "Abstract" titles (or language equivalents) +immediately following the docinfo element. + +This field-name-to-element mapping can be replaced for other +languages. See the `DocInfo transform`_ implementation documentation +for details. + +Unregistered/generic fields may contain one or more paragraphs or +arbitrary body elements. +The field name is also used as a `"classes" attribute`_ value after being +converted into a valid identifier form. + + +RCS Keywords +```````````` + +`Bibliographic fields`_ recognized by the parser are normally checked +for RCS [#]_ keywords and cleaned up [#]_. RCS keywords may be +entered into source files as "$keyword$", and once stored under RCS, +CVS [#]_, or SVN [#]_, they are expanded to "$keyword: expansion text $". +For example, a "Status" field will be transformed to a "status" element:: + + :Status: $keyword: expansion text $ + +.. [#] Revision Control System. +.. [#] RCS keyword processing can be turned off (unimplemented). +.. [#] Concurrent Versions System. CVS uses the same keywords as RCS. +.. [#] Subversion Versions System. Uses the same keywords as RCS. + +Processed, the "status" element's text will become simply "expansion +text". The dollar sign delimiters and leading RCS keyword name are +removed. + +The RCS keyword processing only kicks in when the field list is in +bibliographic context (first non-comment construct in the document, +after a document title if there is one). + +.. _option list: + +Option Lists +------------ + +Doctree elements: option_list_, option_list_item_, option_group_, option_, +option_string_, option_argument_, description_. + +Option lists map a program's command-line options to descriptions +documenting them. For example:: + + -a Output all. + -b Output both (this description is + quite long). + -c arg Output just arg. + --long Output all day long. + + -p This option has two paragraphs in the description. + This is the first. + + This is the second. Blank lines may be omitted between + options (as above) or left in (as here and below). + + --very-long-option A VMS-style option. Note the adjustment for + the required two spaces. + + --an-even-longer-option + The description can also start on the next line. + + -2, --two This option has two variants. + + -f FILE, --file=FILE These two options are synonyms; both have + arguments. + + /V A VMS/DOS-style option. + +There are several types of options recognized by reStructuredText: + +- Short POSIX options consist of one dash and an option letter. +- Long POSIX options consist of two dashes and an option word; some + systems use a single dash. +- Old GNU-style "plus" options consist of one plus and an option + letter ("plus" options are deprecated now, their use discouraged). +- DOS/VMS options consist of a slash and an option letter or word. + +Please note that both POSIX-style and DOS/VMS-style options may be +used by DOS or Windows software. These and other variations are +sometimes used mixed together. The names above have been chosen for +convenience only. + +The syntax for short and long POSIX options is based on the syntax +supported by Python's getopt.py_ module, which implements an option +parser similar to the `GNU libc getopt_long()`_ function but with some +restrictions. There are many variant option systems, and +reStructuredText option lists do not support all of them. + +Although long POSIX and DOS/VMS option words may be allowed to be +truncated by the operating system or the application when used on the +command line, reStructuredText option lists do not show or support +this with any special syntax. The complete option word should be +given, supported by notes about truncation if and when applicable. + +Options may be followed by an argument placeholder, whose role and +syntax should be explained in the description text. +Either a space or an equals sign may be used as a delimiter between long +options and option argument placeholders; +short options ("-" or "+" prefix only) use a space or omit the delimiter. +Option arguments may take one of two forms: + +- Begins with a letter (``[a-zA-Z]``) and subsequently consists of + letters, numbers, underscores and hyphens (``[a-zA-Z0-9_-]``). +- Begins with an open-angle-bracket (``<``) and ends with a + close-angle-bracket (``>``); any characters except angle brackets + are allowed internally. + +Multiple option "synonyms" may be listed, sharing a single +description. They must be separated by comma-space. + +There must be at least two spaces between the option(s) and the +description (which can also start on the next line). The description +may contain multiple body elements. +The first line after the option marker determines the indentation of the +description. As with other types of lists, blank lines are required +before the first option list item and after the last, but are optional +between option entries. + +Syntax diagram (simplified):: + + +----------------------------+-------------+ + | option [" " argument] " " | description | + +-------+--------------------+ | + | (body elements)+ | + +----------------------------------+ + + +Literal Blocks +-------------- + +Doctree element: literal_block_. + +A paragraph consisting of two colons ("::") signifies that the +following text block(s) comprise a literal block. The literal block +must either be indented or quoted (see below). No markup processing +is done within a literal block. It is left as-is, and is typically +rendered in a monospaced typeface:: + + This is a typical paragraph. An indented literal block follows. + + :: + + for a in [5,4,3,2,1]: # this is program code, shown as-is + print a + print "it's..." + # a literal block continues until the indentation ends + + This text has returned to the indentation of the first paragraph, + is outside of the literal block, and is therefore treated as an + ordinary paragraph. + +The paragraph containing only "::" will be completely removed from the +output; no empty paragraph will remain. + +As a convenience, the "::" is recognized at the end of any paragraph. +If immediately preceded by whitespace, both colons will be removed +from the output (this is the "partially minimized" form). When text +immediately precedes the "::", *one* colon will be removed from the +output, leaving only one colon visible (i.e., "::" will be replaced by +":"; this is the "fully minimized" form). + +In other words, these are all equivalent (please pay attention to the +colons after "Paragraph"): + +1. Expanded form:: + + Paragraph: + + :: + + Literal block + +2. Partially minimized form:: + + Paragraph: :: + + Literal block + +3. Fully minimized form:: + + Paragraph:: + + Literal block + +All whitespace (including line breaks, but excluding minimum +indentation for indented literal blocks) is preserved. Blank lines +are required before and after a literal block, but these blank lines +are not included as part of the literal block. + + +Indented Literal Blocks +``````````````````````` + +Indented literal blocks are indicated by indentation relative to the +surrounding text (leading whitespace on each line). The minimum +indentation will be removed from each line of an indented literal +block. The literal block need not be contiguous; blank lines are +allowed between sections of indented text. The literal block ends +with the end of the indentation. + +Syntax diagram:: + + +------------------------------+ + | paragraph | + | (ends with "::") | + +------------------------------+ + +---------------------------+ + | indented literal block | + +---------------------------+ + + +Quoted Literal Blocks +````````````````````` + +Quoted literal blocks are unindented contiguous blocks of text where +each line begins with the same non-alphanumeric printable 7-bit ASCII +character [#]_. A blank line ends a quoted literal block. The +quoting characters are preserved in the processed document. + +.. [#] + The following are all valid quoting characters:: + + ! " # $ % & ' ( ) * + , - . / : ; < = > ? @ [ \ ] ^ _ ` { | } ~ + + Note that these are the same characters as are valid for title + adornment of sections_. + +Possible uses include literate programming in Haskell and email +quoting:: + + John Doe wrote:: + + >> Great idea! + > + > Why didn't I think of that? + + You just did! ;-) + +Syntax diagram:: + + +------------------------------+ + | paragraph | + | (ends with "::") | + +------------------------------+ + +------------------------------+ + | ">" per-line-quoted | + | ">" contiguous literal block | + +------------------------------+ + + +Line Blocks +----------- + +Doctree elements: line_block_, line_. (New in Docutils 0.3.5.) + +Line blocks are useful for address blocks, verse (poetry, song +lyrics), and unadorned lists, where the structure of lines is +significant. Line blocks are groups of lines beginning with vertical +bar ("|") prefixes. Each vertical bar prefix indicates a new line, so +line breaks are preserved. Initial indents are also significant, +resulting in a nested structure. Inline markup is supported. +Continuation lines are wrapped portions of long lines; they begin with +a space in place of the vertical bar. The left edge of a continuation +line must be indented, but need not be aligned with the left edge of +the text above it. A line block ends with a blank line. + +This example illustrates continuation lines:: + + | Lend us a couple of bob till Thursday. + | I'm absolutely skint. + | But I'm expecting a postal order and I can pay you back + as soon as it comes. + | Love, Ewan. + +This example illustrates the nesting of line blocks, indicated by the +initial indentation of new lines:: + + Take it away, Eric the Orchestra Leader! + + | A one, two, a one two three four + | + | Half a bee, philosophically, + | must, *ipso facto*, half not be. + | But half the bee has got to be, + | *vis a vis* its entity. D'you see? + | + | But can a bee be said to be + | or not to be an entire bee, + | when half the bee is not a bee, + | due to some ancient injury? + | + | Singing... + +Syntax diagram:: + + +------+-----------------------+ + | "| " | line | + +------| continuation line | + +-----------------------+ + + +Block Quotes +------------ + +Doctree elements: block_quote_, attribution_. + +A text block that is indented relative to the preceding text, without +preceding markup indicating it to be a literal block or other content, +is a block quote. All markup processing (for body elements and inline +markup) continues within the block quote:: + + This is an ordinary paragraph, introducing a block quote. + + "It is my business to know things. That is my trade." + + -- Sherlock Holmes + +A block quote may end with an attribution: a text block beginning with +``--``, ``---``, or a true em-dash, flush left within the block quote. If +the attribution consists of multiple lines, the left edges of the +second and subsequent lines must align. + +Multiple block quotes may occur consecutively if terminated with +attributions. + + Unindented paragraph. + + Block quote 1. + + -- Attribution 1 + + Block quote 2. + +`Empty comments`_ may be used to explicitly terminate preceding +constructs that would otherwise consume a block quote:: + + * List item. + + .. + + Block quote 3. + +Empty comments may also be used to separate block quotes:: + + Block quote 4. + + .. + + Block quote 5. + +Blank lines are required before and after a block quote, but these +blank lines are not included as part of the block quote. + +Syntax diagram:: + + +------------------------------+ + | (current level of | + | indentation) | + +------------------------------+ + +---------------------------+ + | block quote | + | (body elements)+ | + | | + | -- attribution text | + | (optional) | + +---------------------------+ + + +Doctest Blocks +-------------- + +Doctree element: doctest_block_. + +Doctest blocks are interactive Python sessions cut-and-pasted into +docstrings. They are meant to illustrate usage by example, and +provide an elegant and powerful testing environment via the `doctest +module`_ in the Python standard library. + +Doctest blocks are text blocks which begin with ``">>> "``, the Python +interactive interpreter main prompt, and end with a blank line. +Doctest blocks are treated as a special case of literal blocks, +without requiring the literal block syntax. If both are present, the +literal block syntax takes priority over Doctest block syntax:: + + This is an ordinary paragraph. + + >>> print 'this is a Doctest block' + this is a Doctest block + + The following is a literal block:: + + >>> This is not recognized as a doctest block by + reStructuredText. It *will* be recognized by the doctest + module, though! + +Indentation is not required for doctest blocks. + + +Tables +------ + +Doctree elements: table_, tgroup_, colspec_, thead_, tbody_, row_, entry_. + +ReStructuredText provides two syntax variants for delineating table +cells: `Grid Tables`_ and `Simple Tables`_. Tables are also generated by +the `CSV Table`_ and `List Table`_ directives. The `table +directive`_ is used to add a table title or specify options. + +As with other body elements, blank lines are required before and after +tables. Tables' left edges should align with the left edge of +preceding text blocks; if indented, the table is considered to be part +of a block quote. + +Once isolated, each table cell is treated as a miniature document; the +top and bottom cell boundaries act as delimiting blank lines. Each +cell contains zero or more body elements. Cell contents may include +left and/or right margins, which are removed before processing. + + +Grid Tables +``````````` + +Grid tables provide a complete table representation via grid-like +"ASCII art". Grid tables allow arbitrary cell contents (body +elements), and both row and column spans. However, grid tables can be +cumbersome to produce, especially for simple data sets. The `Emacs +table mode`_ is a tool that allows easy editing of grid tables, in +Emacs. See `Simple Tables`_ for a simpler (but limited) +representation. + +Grid tables are described with a visual grid made up of the characters +"-", "=", "|", and "+". The hyphen ("-") is used for horizontal lines +(row separators). The equals sign ("=") may be used to separate +optional header rows from the table body (not supported by the `Emacs +table mode`_). The vertical bar ("|") is used for vertical lines +(column separators). The plus sign ("+") is used for intersections of +horizontal and vertical lines. Example:: + + +------------------------+------------+----------+----------+ + | Header row, column 1 | Header 2 | Header 3 | Header 4 | + | (header rows optional) | | | | + +========================+============+==========+==========+ + | body row 1, column 1 | column 2 | column 3 | column 4 | + +------------------------+------------+----------+----------+ + | body row 2 | Cells may span columns. | + +------------------------+------------+---------------------+ + | body row 3 | Cells may | - Table cells | + +------------------------+ span rows. | - contain | + | body row 4 | | - body elements. | + +------------------------+------------+---------------------+ + +Some care must be taken with grid tables to avoid undesired +interactions with cell text in rare cases. For example, the following +table contains a cell in row 2 spanning from column 2 to column 4:: + + +--------------+----------+-----------+-----------+ + | row 1, col 1 | column 2 | column 3 | column 4 | + +--------------+----------+-----------+-----------+ + | row 2 | | + +--------------+----------+-----------+-----------+ + | row 3 | | | | + +--------------+----------+-----------+-----------+ + +If a vertical bar is used in the text of that cell, it could have +unintended effects if accidentally aligned with column boundaries:: + + +--------------+----------+-----------+-----------+ + | row 1, col 1 | column 2 | column 3 | column 4 | + +--------------+----------+-----------+-----------+ + | row 2 | Use the command ``ls | more``. | + +--------------+----------+-----------+-----------+ + | row 3 | | | | + +--------------+----------+-----------+-----------+ + +Several solutions are possible. All that is needed is to break the +continuity of the cell outline rectangle. One possibility is to shift +the text by adding an extra space before:: + + +--------------+----------+-----------+-----------+ + | row 1, col 1 | column 2 | column 3 | column 4 | + +--------------+----------+-----------+-----------+ + | row 2 | Use the command ``ls | more``. | + +--------------+----------+-----------+-----------+ + | row 3 | | | | + +--------------+----------+-----------+-----------+ + +Another possibility is to add an extra line to row 2:: + + +--------------+----------+-----------+-----------+ + | row 1, col 1 | column 2 | column 3 | column 4 | + +--------------+----------+-----------+-----------+ + | row 2 | Use the command ``ls | more``. | + | | | + +--------------+----------+-----------+-----------+ + | row 3 | | | | + +--------------+----------+-----------+-----------+ + + +Simple Tables +````````````` + +Simple tables provide a compact and easy to type but limited +row-oriented table representation for simple data sets. Cell contents +are typically single paragraphs, although arbitrary body elements may +be represented in most cells. Simple tables allow multi-line rows (in +all but the first column) and column spans, but not row spans. See +`Grid Tables`_ above for a complete table representation. + +Simple tables are described with horizontal borders made up of "=" and +"-" characters. The equals sign ("=") is used for top and bottom +table borders, and to separate optional header rows from the table +body. The hyphen ("-") is used to indicate column spans in a single +row by underlining the joined columns, and may optionally be used to +explicitly and/or visually separate rows. + +A simple table begins with a top border of equals signs with one or +more spaces at each column boundary (two or more spaces recommended). +Regardless of spans, the top border *must* fully describe all table +columns. There must be at least two columns in the table (to +differentiate it from section headers). The top border may be +followed by header rows, and the last of the optional header rows is +underlined with '=', again with spaces at column boundaries. There +may not be a blank line below the header row separator; it would be +interpreted as the bottom border of the table. The bottom boundary of +the table consists of '=' underlines, also with spaces at column +boundaries. For example, here is a truth table, a three-column table +with one header row and four body rows:: + + ===== ===== ======= + A B A and B + ===== ===== ======= + False False False + True False False + False True False + True True True + ===== ===== ======= + +Underlines of '-' may be used to indicate column spans by "filling in" +column margins to join adjacent columns. Column span underlines must +be complete (they must cover all columns) and align with established +column boundaries. Text lines containing column span underlines may +not contain any other text. A column span underline applies only to +one row immediately above it. For example, here is a table with a +column span in the header:: + + ===== ===== ====== + Inputs Output + ------------ ------ + A B A or B + ===== ===== ====== + False False False + True False True + False True True + True True True + ===== ===== ====== + +Each line of text must contain spaces at column boundaries, except +where cells have been joined by column spans. Each line of text +starts a new row, except when there is a blank cell in the first +column. In that case, that line of text is parsed as a continuation +line. For this reason, cells in the first column of new rows (*not* +continuation lines) *must* contain some text; blank cells would lead +to a misinterpretation (but see the tip below). Also, this mechanism +limits cells in the first column to only one line of text. Use `grid +tables`_ if this limitation is unacceptable. + +.. Tip:: + + To start a new row in a simple table without text in the first + column in the processed output, use one of these: + + * an empty comment (".."), which may be omitted from the processed + output (see Comments_ below) + + * a backslash escape ("``\``") followed by a space (see `Escaping + Mechanism`_ above) + +Underlines of '-' may also be used to visually separate rows, even if +there are no column spans. This is especially useful in long tables, +where rows are many lines long. + +Blank lines are permitted within simple tables. Their interpretation +depends on the context. Blank lines *between* rows are ignored. +Blank lines *within* multi-line rows may separate paragraphs or other +body elements within cells. + +The rightmost column is unbounded; text may continue past the edge of +the table (as indicated by the table borders). However, it is +recommended that borders be made long enough to contain the entire +text. + +The following example illustrates continuation lines (row 2 consists +of two lines of text, and four lines for row 3), a blank line +separating paragraphs (row 3, column 2), text extending past the right +edge of the table, and a new row which will have no text in the first +column in the processed output (row 4):: + + ===== ===== + col 1 col 2 + ===== ===== + 1 Second column of row 1. + 2 Second column of row 2. + Second line of paragraph. + 3 - Second column of row 3. + + - Second item in bullet + list (row 3, column 2). + \ Row 4; column 1 will be empty. + ===== ===== + + +Explicit Markup Blocks +---------------------- + +The explicit markup syntax is used for footnotes_, citations_, +`hyperlink targets`_, directives_, `substitution definitions`_, +and comments_. + +An explicit markup block is a text block: + +- whose first line begins with ".." followed by whitespace (the + "explicit markup start"), +- whose second and subsequent lines (if any) are indented relative to + the first, and +- which ends before an unindented line. + +Explicit markup blocks are analogous to field list items. The +maximum common indentation is always removed from the second and +subsequent lines of the block body. Therefore, if the first construct +fits in one line and the indentation of the first and second +constructs should differ, the first construct should not begin on the +same line as the explicit markup start. + +Blank lines are required between explicit markup blocks and other +elements, but are optional between explicit markup blocks where +unambiguous. + + +Footnotes +````````` + +See also: `Footnote References`_. + +Doctree elements: footnote_, label_. + +Configuration settings: +`footnote_references <footnote_references setting_>`_. + +.. _footnote_references setting: + ../../user/config.html#footnote-references + +Each footnote consists of an explicit markup start (".. "), a left +square bracket, the footnote label, a right square bracket, and +whitespace, followed by indented body elements. A footnote label can +be: + +- a whole decimal number consisting of one or more digits, + +- a single "#" (denoting `auto-numbered footnotes`_), + +- a "#" followed by a simple reference name (an `autonumber label`_), + or + +- a single "*" (denoting `auto-symbol footnotes`_). + +The footnote content (body elements) must be consistently indented +and left-aligned. The first body element within a +footnote may often begin on the same line as the footnote label. +However, if the first element fits on one line and the indentation of +the remaining elements differ, the first element must begin on the +line after the footnote label. Otherwise, the difference in +indentation will not be detected. + +Footnotes may occur anywhere in the document, not only at the end. +Where and how they appear in the processed output depends on the +processing system. + +Here is a manually numbered footnote:: + + .. [1] Body elements go here. + +Each footnote automatically generates a hyperlink target pointing to +itself. The text of the hyperlink target name is the same as that of +the footnote label. `Auto-numbered footnotes`_ generate a number as +their footnote label and reference name. See `Implicit Hyperlink +Targets`_ for a complete description of the mechanism. + +Syntax diagram:: + + +-------+-------------------------+ + | ".. " | "[" label "]" footnote | + +-------+ | + | (body elements)+ | + +-------------------------+ + + +Auto-Numbered Footnotes +....................... + +A number sign ("#") may be used as the first character of a footnote +label to request automatic numbering of the footnote or footnote +reference. + +The first footnote to request automatic numbering is assigned the +label "1", the second is assigned the label "2", and so on (assuming +there are no manually numbered footnotes present; see `Mixed Manual +and Auto-Numbered Footnotes`_ below). A footnote which has +automatically received a label "1" generates an implicit hyperlink +target with name "1", just as if the label was explicitly specified. + +.. _autonumber label: `autonumber labels`_ + +A footnote may specify a label explicitly while at the same time +requesting automatic numbering: ``[#label]``. These labels are called +_`autonumber labels`. Autonumber labels do two things: + +- On the footnote itself, they generate a hyperlink target whose name + is the autonumber label (doesn't include the "#"). + +- They allow an automatically numbered footnote to be referred to more + than once, as a footnote reference or hyperlink reference. For + example:: + + If [#note]_ is the first footnote reference, it will show up as + "[1]". We can refer to it again as [#note]_ and again see + "[1]". We can also refer to it as note_ (an ordinary internal + hyperlink reference). + + .. [#note] This is the footnote labeled "note". + +The numbering is determined by the order of the footnotes, not by the +order of the references. For footnote references without autonumber +labels (``[#]_``), the footnotes and footnote references must be in +the same relative order but need not alternate in lock-step. For +example:: + + [#]_ is a reference to footnote 1, and [#]_ is a reference to + footnote 2. + + .. [#] This is footnote 1. + .. [#] This is footnote 2. + .. [#] This is footnote 3. + + [#]_ is a reference to footnote 3. + +Special care must be taken if footnotes themselves contain +auto-numbered footnote references, or if multiple references are made +in close proximity. Footnotes and references are noted in the order +they are encountered in the document, which is not necessarily the +same as the order in which a person would read them. + + +Auto-Symbol Footnotes +..................... + +An asterisk ("*") may be used for footnote labels to request automatic +symbol generation for footnotes and footnote references. The asterisk +may be the only character in the label. For example:: + + Here is a symbolic footnote reference: [*]_. + + .. [*] This is the footnote. + +A transform will insert symbols as labels into corresponding footnotes +and footnote references. The number of references must be equal to +the number of footnotes. One symbol footnote cannot have multiple +references. + +The standard Docutils system uses the following symbols for footnote +marks [#]_: + +- asterisk/star ("*") +- dagger (HTML character entity "†", Unicode U+02020) +- double dagger ("‡"/U+02021) +- section mark ("§"/U+000A7) +- pilcrow or paragraph mark ("¶"/U+000B6) +- number sign ("#") +- spade suit ("♠"/U+02660) +- heart suit ("♥"/U+02665) +- diamond suit ("♦"/U+02666) +- club suit ("♣"/U+02663) + +.. [#] This list was inspired by the list of symbols for "Note + Reference Marks" in The Chicago Manual of Style, 14th edition, + section 12.51. "Parallels" ("||") were given in CMoS instead of + the pilcrow. The last four symbols (the card suits) were added + arbitrarily. + +If more than ten symbols are required, the same sequence will be +reused, doubled and then tripled, and so on ("**" etc.). + +.. Note:: When using auto-symbol footnotes, the choice of output + encoding is important. Many of the symbols used are not encodable + in certain common text encodings such as Latin-1 (ISO 8859-1). The + use of UTF-8 for the output encoding is recommended. An + alternative for HTML and XML output is to use the + "xmlcharrefreplace" `output encoding error handler`__. + +__ ../../user/config.html#output-encoding-error-handler + + +Mixed Manual and Auto-Numbered Footnotes +........................................ + +Manual and automatic footnote numbering may both be used within a +single document, although the results may not be expected. Manual +numbering takes priority. Only unused footnote numbers are assigned +to auto-numbered footnotes. The following example should be +illustrative:: + + [2]_ will be "2" (manually numbered), + [#]_ will be "3" (anonymous auto-numbered), and + [#label]_ will be "1" (labeled auto-numbered). + + .. [2] This footnote is labeled manually, so its number is fixed. + + .. [#label] This autonumber-labeled footnote will be labeled "1". + It is the first auto-numbered footnote and no other footnote + with label "1" exists. The order of the footnotes is used to + determine numbering, not the order of the footnote references. + + .. [#] This footnote will be labeled "3". It is the second + auto-numbered footnote, but footnote label "2" is already used. + + +Citations +````````` + +See also: `Citation References`_. + +Doctree element: citation_ + +Citations are identical to footnotes except that they use only +non-numeric labels such as ``[note]`` or ``[GVR2001]``. Citation +labels are simple `reference names`_ (case-insensitive single words +consisting of alphanumerics plus internal hyphens, underscores, and +periods; no whitespace). Citations may be rendered separately and +differently from footnotes. For example:: + + Here is a citation reference: [CIT2002]_. + + .. [CIT2002] This is the citation. It's just like a footnote, + except the label is textual. + + +.. _hyperlinks: + +Hyperlink Targets +````````````````` + +Doctree element: target_. + +These are also called _`explicit hyperlink targets`, to differentiate +them from `implicit hyperlink targets`_ defined below. + +Hyperlink targets identify a location within or outside of a document, +which may be linked to by `hyperlink references`_. + +Hyperlink targets may be named or anonymous. Named hyperlink targets +consist of an explicit markup start (".. "), an underscore, the +reference name (no trailing underscore), a colon, whitespace, and a +link block:: + + .. _hyperlink-name: link-block + +Reference names are whitespace-neutral and case-insensitive. See +`Reference Names`_ for details and examples. + +Anonymous hyperlink targets consist of an explicit markup start +(".. "), two underscores, a colon, whitespace, and a link block; there +is no reference name:: + + .. __: anonymous-hyperlink-target-link-block + +An alternate syntax for anonymous hyperlinks consists of two +underscores, a space, and a link block:: + + __ anonymous-hyperlink-target-link-block + +See `Anonymous Hyperlinks`_ below. + +There are three types of hyperlink targets: internal, external, and +indirect. + +1. _`Internal hyperlink targets` have empty link blocks. They provide + an end point allowing a hyperlink to connect one place to another + within a document. An internal hyperlink target points to the + element following the target. [#]_ For example:: + + Clicking on this internal hyperlink will take us to the target_ + below. + + .. _target: + + The hyperlink target above points to this paragraph. + + Internal hyperlink targets may be "chained". Multiple adjacent + internal hyperlink targets all point to the same element:: + + .. _target1: + .. _target2: + + The targets "target1" and "target2" are synonyms; they both + point to this paragraph. + + If the element "pointed to" is an external hyperlink target (with a + URI in its link block; see #2 below) the URI from the external + hyperlink target is propagated to the internal hyperlink targets; + they will all "point to" the same URI. There is no need to + duplicate a URI. For example, all three of the following hyperlink + targets refer to the same URI:: + + .. _Python DOC-SIG mailing list archive: + .. _archive: + .. _Doc-SIG: https://mail.python.org/pipermail/doc-sig/ + + An inline form of internal hyperlink target is available; see + `Inline Internal Targets`_. + + .. [#] Works also, if the internal hyperlink target is "nested" at the + end of an indented text block. This behaviour allows setting targets + to individual list items (except the first, as a preceding internal + target applies to the list as a whole):: + + * bullet list + + .. _`second item`: + + * second item, with hyperlink target. + + +2. _`External hyperlink targets` have an absolute or relative URI or + email address in their link blocks. For example, take the + following input:: + + See the Python_ home page for info. + + `Write to me`_ with your questions. + + .. _Python: https://www.python.org + .. _Write to me: jdoe@example.com + + After processing into HTML, the hyperlinks might be expressed as:: + + See the <a href="https://www.python.org">Python</a> home page + for info. + + <a href="mailto:jdoe@example.com">Write to me</a> with your + questions. + + An external hyperlink's URI may begin on the same line as the + explicit markup start and target name, or it may begin in an + indented text block immediately following, with no intervening + blank lines. If there are multiple lines in the link block, they + are concatenated. Any unescaped whitespace is removed (whitespace is + permitted to allow for line wrapping). The following external + hyperlink targets are equivalent:: + + .. _one-liner: https://docutils.sourceforge.io/rst.html + + .. _starts-on-this-line: https:// + docutils.sourceforge.net/rst.html + + .. _entirely-below: + https://docutils. + sourceforge.net/rst.html + + Escaped whitespace is preserved as intentional spaces, e.g.:: + + .. _reference: ../local\ path\ with\ spaces.html + + If an external hyperlink target's URI contains an underscore as its + last character, it must be escaped to avoid being mistaken for an + indirect hyperlink target:: + + This link_ refers to a file called ``underscore_``. + + .. _link: underscore\_ + + It is possible (although not generally recommended) to include URIs + directly within hyperlink references. See `Embedded URIs and Aliases`_ + below. + +3. _`Indirect hyperlink targets` have a hyperlink reference in their + link blocks. In the following example, target "one" indirectly + references whatever target "two" references, and target "two" + references target "three", an internal hyperlink target. In + effect, all three reference the same thing:: + + .. _one: two_ + .. _two: three_ + .. _three: + + Just as with `hyperlink references`_ anywhere else in a document, + if a phrase-reference is used in the link block it must be enclosed + in backquotes. As with `external hyperlink targets`_, the link + block of an indirect hyperlink target may begin on the same line as + the explicit markup start or the next line. It may also be split + over multiple lines, in which case the lines are joined with + whitespace before being normalized. + + For example, the following indirect hyperlink targets are + equivalent:: + + .. _one-liner: `A HYPERLINK`_ + .. _entirely-below: + `a hyperlink`_ + .. _split: `A + Hyperlink`_ + + It is possible to include an alias directly within hyperlink + references. See `Embedded URIs and Aliases`_ below. + +If the reference name contains any colons, either: + +- the phrase must be enclosed in backquotes:: + + .. _`FAQTS: Computers: Programming: Languages: Python`: + http://python.faqts.com/ + +- or the colon(s) must be backslash-escaped in the link target:: + + .. _Chapter One\: "Tadpole Days": + + It's not easy being green... + +See `Implicit Hyperlink Targets`_ below for the resolution of +duplicate reference names. + +Syntax diagram:: + + +-------+----------------------+ + | ".. " | "_" name ":" link | + +-------+ block | + | | + +----------------------+ + + +Anonymous Hyperlinks +.................... + +The `World Wide Web Consortium`_ recommends in its `HTML Techniques +for Web Content Accessibility Guidelines`_ that authors should +"clearly identify the target of each link." Hyperlink references +should be as verbose as possible, but duplicating a verbose hyperlink +name in the target is onerous and error-prone. Anonymous hyperlinks +are designed to allow convenient verbose hyperlink references, and are +analogous to `Auto-Numbered Footnotes`_. They are particularly useful +in short or one-off documents. However, this feature is easily abused +and can result in unreadable plaintext and/or unmaintainable +documents. Caution is advised. + +Anonymous `hyperlink references`_ are specified with two underscores +instead of one:: + + See `the web site of my favorite programming language`__. + +Anonymous targets begin with ".. __:"; no reference name is required +or allowed:: + + .. __: https://www.python.org + +As a convenient alternative, anonymous targets may begin with "__" +only:: + + __ https://www.python.org + +The reference name of the reference is not used to match the reference +to its target. Instead, the order of anonymous hyperlink references +and targets within the document is significant: the first anonymous +reference will link to the first anonymous target. The number of +anonymous hyperlink references in a document must match the number of +anonymous targets. For readability, it is recommended that targets be +kept close to references. Take care when editing text containing +anonymous references; adding, removing, and rearranging references +require attention to the order of corresponding targets. + + +Directives +`````````` + +Doctree elements: depend on the directive. + +Directives are an extension mechanism for reStructuredText, a way of +adding support for new constructs without adding new primary syntax +(directives may support additional syntax locally). All standard +directives (those implemented and registered in the reference +reStructuredText parser) are described in the `reStructuredText +Directives`_ document, and are always available. Any other directives +are domain-specific, and may require special action to make them +available when processing the document. + +For example, here's how an image_ may be placed:: + + .. image:: mylogo.jpeg + +A figure_ (a graphic with a caption) may placed like this:: + + .. figure:: larch.png + + The larch. + +An admonition_ (note, caution, etc.) contains other body elements:: + + .. note:: This is a paragraph + + - Here is a bullet list. + +Directives are indicated by an explicit markup start (".. ") followed +by the directive type, two colons, and whitespace (together called the +"directive marker"). Directive types are case-insensitive single +words (alphanumerics plus isolated internal hyphens, underscores, +plus signs, colons, and periods; no whitespace). Two colons are used +after the directive type for these reasons: + +- Two colons are distinctive, and unlikely to be used in common text. + +- Two colons avoids clashes with common comment text like:: + + .. Danger: modify at your own risk! + +- If an implementation of reStructuredText does not recognize a + directive (i.e., the directive-handler is not installed), a level-3 + (error) system message is generated, and the entire directive block + (including the directive itself) will be included as a literal + block. Thus "::" is a natural choice. + +The directive block consists of any text on the first line of the +directive after the directive marker, and any subsequent indented +text. The interpretation of the directive block is up to the +directive code. There are three logical parts to the directive block: + +1. Directive arguments. +2. Directive options. +3. Directive content. + +Individual directives can employ any combination of these parts. +Directive arguments can be filesystem paths, URLs, title text, etc. +Directive options are indicated using `field lists`_; the field names +and contents are directive-specific. Arguments and options must form +a contiguous block beginning on the first or second line of the +directive; a blank line indicates the beginning of the directive +content block. If either arguments and/or options are employed by the +directive, a blank line must separate them from the directive content. +The "figure" directive employs all three parts:: + + .. figure:: larch.png + :scale: 50 + + The larch. + +Simple directives may not require any content. If a directive that +does not employ a content block is followed by indented text anyway, +it is an error. If a block quote should immediately follow a +directive, use an empty comment in-between (see Comments_ below). + +Actions taken in response to directives and the interpretation of text +in the directive content block or subsequent text block(s) are +directive-dependent. See `reStructuredText Directives`_ for details. + +Directives are meant for the arbitrary processing of their contents, +which can be transformed into something possibly unrelated to the +original text. It may also be possible for directives to be used as +pragmas, to modify the behavior of the parser, such as to experiment +with alternate syntax. There is no parser support for this +functionality at present; if a reasonable need for pragma directives +is found, they may be supported. + +Directives do not generate "directive" elements; they are a *parser +construct* only, and have no intrinsic meaning outside of +reStructuredText. Instead, the parser will transform recognized +directives into (possibly specialized) document elements. Unknown +directives will trigger level-3 (error) system messages. + +Syntax diagram:: + + +-------+-------------------------------+ + | ".. " | directive type "::" directive | + +-------+ block | + | | + +-------------------------------+ + + +Substitution Definitions +```````````````````````` + +Doctree element: substitution_definition_. + +Substitution definitions are indicated by an explicit markup start +(".. ") followed by a vertical bar, the substitution text, another +vertical bar, whitespace, and the definition block. Substitution text +may not begin or end with whitespace. A substitution definition block +contains an embedded inline-compatible directive (without the leading +".. "), such as "image_" or "replace_". For example:: + + The |biohazard| symbol must be used on containers used to + dispose of medical waste. + + .. |biohazard| image:: biohazard.png + +It is an error for a substitution definition block to directly or +indirectly contain a circular substitution reference. + +`Substitution references`_ are replaced in-line by the processed +contents of the corresponding definition (linked by matching +substitution text). Matches are case-sensitive but forgiving; if no +exact match is found, a case-insensitive comparison is attempted. + +Substitution definitions allow the power and flexibility of +block-level directives_ to be shared by inline text. They are a way +to include arbitrarily complex inline structures within text, while +keeping the details out of the flow of text. They are the equivalent +of SGML/XML's named entities or programming language macros. + +Without the substitution mechanism, every time someone wants an +application-specific new inline structure, they would have to petition +for a syntax change. In combination with existing directive syntax, +any inline structure can be coded without new syntax (except possibly +a new directive). + +Syntax diagram:: + + +-------+-----------------------------------------------------+ + | ".. " | "|" substitution text "| " directive type "::" data | + +-------+ directive block | + | | + +-----------------------------------------------------+ + +Following are some use cases for the substitution mechanism. Please +note that most of the embedded directives shown are examples only and +have not been implemented. + +Objects + Substitution references may be used to associate ambiguous text + with a unique object identifier. + + For example, many sites may wish to implement an inline "user" + directive:: + + |Michael| and |Jon| are our widget-wranglers. + + .. |Michael| user:: mjones + .. |Jon| user:: jhl + + Depending on the needs of the site, this may be used to index the + document for later searching, to hyperlink the inline text in + various ways (mailto, homepage, mouseover Javascript with profile + and contact information, etc.), or to customize presentation of + the text (include username in the inline text, include an icon + image with a link next to the text, make the text bold or a + different color, etc.). + + The same approach can be used in documents which frequently refer + to a particular type of objects with unique identifiers but + ambiguous common names. Movies, albums, books, photos, court + cases, and laws are possible. For example:: + + |The Transparent Society| offers a fascinating alternate view + on privacy issues. + + .. |The Transparent Society| book:: isbn=0738201448 + + Classes or functions, in contexts where the module or class names + are unclear and/or interpreted text cannot be used, are another + possibility:: + + 4XSLT has the convenience method |runString|, so you don't + have to mess with DOM objects if all you want is the + transformed output. + + .. |runString| function:: module=xml.xslt class=Processor + +Images + Images are a common use for substitution references:: + + West led the |H| 3, covered by dummy's |H| Q, East's |H| K, + and trumped in hand with the |S| 2. + + .. |H| image:: /images/heart.png + :height: 11 + :width: 11 + .. |S| image:: /images/spade.png + :height: 11 + :width: 11 + + * |Red light| means stop. + * |Green light| means go. + * |Yellow light| means go really fast. + + .. |Red light| image:: red_light.png + .. |Green light| image:: green_light.png + .. |Yellow light| image:: yellow_light.png + + |-><-| is the official symbol of POEE_. + + .. |-><-| image:: discord.png + .. _POEE: http://www.poee.org/ + + The "image_" directive has been implemented. + +Styles [#]_ + Substitution references may be used to associate inline text with + an externally defined presentation style:: + + Even |the text in Texas| is big. + + .. |the text in Texas| style:: big + + The style name may be meaningful in the context of some particular + output format (CSS class name for HTML output, LaTeX style name + for LaTeX, etc), or may be ignored for other output formats (such + as plaintext). + + .. @@@ This needs to be rethought & rewritten or removed: + + Interpreted text is unsuitable for this purpose because the set + of style names cannot be predefined - it is the domain of the + content author, not the author of the parser and output + formatter - and there is no way to associate a style name + argument with an interpreted text style role. Also, it may be + desirable to use the same mechanism for styling blocks:: + + .. style:: motto + At Bob's Underwear Shop, we'll do anything to get in + your pants. + + .. style:: disclaimer + All rights reversed. Reprint what you like. + + .. [#] There may be sufficient need for a "style" mechanism to + warrant simpler syntax such as an extension to the interpreted + text role syntax. The substitution mechanism is cumbersome for + simple text styling. + +Templates + Inline markup may be used for later processing by a template + engine. For example, a Zope_ author might write:: + + Welcome back, |name|! + + .. |name| tal:: replace user/getUserName + + After processing, this ZPT output would result:: + + Welcome back, + <span tal:replace="user/getUserName">name</span>! + + Zope would then transform this to something like "Welcome back, + David!" during a session with an actual user. + +Replacement text + The substitution mechanism may be used for simple macro + substitution. This may be appropriate when the replacement text + is repeated many times throughout one or more documents, + especially if it may need to change later. A short example is + unavoidably contrived:: + + |RST|_ is a little annoying to type over and over, especially + when writing about |RST| itself, and spelling out the + bicapitalized word |RST| every time isn't really necessary for + |RST| source readability. + + .. |RST| replace:: reStructuredText + .. _RST: https://docutils.sourceforge.io/rst.html + + Note the trailing underscore in the first use of a substitution + reference. This indicates a reference to the corresponding + hyperlink target. + + Substitution is also appropriate when the replacement text cannot + be represented using other inline constructs, or is obtrusively + long:: + + But still, that's nothing compared to a name like + |j2ee-cas|__. + + .. |j2ee-cas| replace:: + the Java `TM`:super: 2 Platform, Enterprise Edition Client + Access Services + __ http://developer.java.sun.com/developer/earlyAccess/ + j2eecas/ + + The "replace_" directive has been implemented. + + +Comments +```````` + +Doctree element: comment_. + +`Explicit markup blocks`_ that are not recognized as citations_, +directives_, footnotes_, `hyperlink targets`_, or `substitution +definitions`_ will be processed as a comment element. Arbitrary +indented text may be used on the lines following the explicit markup +start. To ensure that none of the other explicit markup constructs is +recognized, leave the ".." on a line by itself:: + + .. This is a comment + .. + _so: is this! + .. + [and] this! + .. + this:: too! + .. + |even| this:: ! + + .. [this] however, is a citation. + +Apart from removing the maximum common indentation, no further +processing is done on the content; a comment contains a single "text +blob". Depending on the output formatter, comments may be removed +from the processed output. + +Syntax diagram:: + + +-------+----------------------+ + | ".. " | comment | + +-------+ block | + | | + +----------------------+ + +Empty Comments +.............. + +An explicit markup start followed by a blank line and nothing else +(apart from whitespace) is an "_`empty comment`". It serves to +terminate a preceding construct, and does **not** consume any indented +text following. To have a block quote follow a list or any indented +construct, insert an unindented empty comment in-between:: + + This is + a definition list. + + .. + + This is a block quote. + +Implicit Hyperlink Targets +========================== + +Implicit hyperlink targets are generated by section titles, footnotes, +and citations, and may also be generated by extension constructs. +Implicit hyperlink targets otherwise behave identically to explicit +`hyperlink targets`_. + +Problems of ambiguity due to conflicting duplicate implicit and +explicit reference names are avoided by following this procedure: + +1. `Explicit hyperlink targets`_ override any implicit targets having + the same reference name. The implicit hyperlink targets are + removed, and level-1 (info) system messages are inserted. + +2. Duplicate implicit hyperlink targets are removed, and level-1 + (info) system messages inserted. For example, if two or more + sections have the same title (such as "Introduction" subsections of + a rigidly-structured document), there will be duplicate implicit + hyperlink targets. + +3. Duplicate explicit hyperlink targets are removed, and level-2 + (warning) system messages are inserted. Exception: duplicate + `external hyperlink targets`_ (identical hyperlink names and + referenced URIs) do not conflict, and are not removed. + +System messages are inserted where target links have been removed. +See "Error Handling" in `PEP 258`_. + +The parser must return a set of *unique* hyperlink targets. The +calling software (such as the Docutils_) can warn of unresolvable +links, giving reasons for the messages. + + +Inline Markup +============= + +In reStructuredText, inline markup applies to words or phrases within +a text block. The same whitespace and punctuation that serves to +delimit words in written text is used to delimit the inline markup +syntax constructs (see the `inline markup recognition rules`_ for +details). The text within inline markup may not begin or end with +whitespace. Arbitrary `character-level inline markup`_ is supported +although not encouraged. Inline markup cannot be nested. + +There are nine inline markup constructs. Five of the constructs use +identical start-strings and end-strings to indicate the markup: + +- emphasis_: "*" +- `strong emphasis`_: "**" +- `interpreted text`_: "`" +- `inline literals`_: "``" +- `substitution references`_: "|" + +Three constructs use different start-strings and end-strings: + +- `inline internal targets`_: "_`" and "`" +- `footnote references`_: "[" and "]_" +- `hyperlink references`_: "`" and "\`_" (phrases), or just a + trailing "_" (single words) + +`Standalone hyperlinks`_ are recognized implicitly, and use no extra +markup. + +Inline comments are not supported. + + +Inline markup recognition rules +------------------------------- + +Inline markup start-strings and end-strings are only recognized if +the following conditions are met: + +1. Inline markup start-strings must be immediately followed by + non-whitespace. + +2. Inline markup end-strings must be immediately preceded by + non-whitespace. + +3. The inline markup end-string must be separated by at least one + character from the start-string. + +4. Both, inline markup start-string and end-string must not be preceded by + an unescaped backslash (except for the end-string of `inline literals`_). + See `Escaping Mechanism`_ above for details. + +5. If an inline markup start-string is immediately preceded by one of the + ASCII characters ``' " < ( [ {`` or a similar + non-ASCII character [#openers]_, it must not be followed by the + corresponding closing character from ``' " > ) ] }`` or a similar + non-ASCII character [#closers]_. (For quotes, matching characters can + be any of the `quotation marks in international usage`_.) + +If the configuration setting `character-level-inline-markup`_ is False +(default), additional conditions apply to the characters "around" the +inline markup: + +6. Inline markup start-strings must start a text block or be + immediately preceded by + + * whitespace, + * one of the ASCII characters ``- : / ' " < ( [ {`` + * or a similar non-ASCII punctuation character. [#pre-chars]_ + +7. Inline markup end-strings must end a text block or be immediately + followed by + + * whitespace, + * one of the ASCII characters ``- . , : ; ! ? \ / ' " ) ] } >`` + * or a similar non-ASCII punctuation character. [#post-chars]_ + +.. [#openers] `Unicode categories`_ `Ps` (Open), `Pi` (Initial quote), + or `Pf` (Final quote). [#uni-version]_ +.. [#closers] Unicode categories `Pe` (Close), `Pi` (Initial quote), + or `Pf` (Final quote). [#uni-version]_ +.. [#pre-chars] Unicode categories `Ps` (Open), `Pi` (Initial quote), + `Pf` (Final quote), `Pd` (Dash), or `Po` (Other). [#uni-version]_ +.. [#post-chars] Unicode categories `Pe` (Close), `Pi` (Initial quote), + `Pf` (Final quote), `Pd` (Dash), or `Po` (Other). [#uni-version]_ + +.. [#uni-version] The category of some characters changed with the + development of the Unicode standard. + Docutils 0.13 uses `Unicode version 5.2.0`_. + +.. _Unicode categories: + https://www.unicode.org/Public/5.1.0/ucd/UCD.html#General_Category_Values +.. _Unicode version 5.2.0: https://www.unicode.org/Public/5.2.0/ +.. _quotation marks in international usage: + https://en.wikipedia.org/wiki/Quotation_mark,_non-English_usage + +The inline markup recognition rules were devised to allow 90% of non-markup +uses of "*", "`", "_", and "|" without escaping. For example, none of the +following terms are recognized as containing inline markup strings: + +- 2 * x a ** b (* BOM32_* ` `` _ __ | (breaks rule 1) +- || (breaks rule 3) +- "*" '|' (*) [*] {*} <*> + ‘*’ ‚*‘ ‘*‚ ’*’ ‚*’ + “*” „*“ “*„ ”*” „*” + »*« ›*‹ «*» »*» ›*› (breaks rule 5) +- 2*x a**b O(N**2) e**(x*y) f(x)*f(y) a|b file*.* + __init__ __init__() (breaks rule 6) + +No escaping is required inside the following inline markup examples: + +- ``*2 * x *a **b *.txt*`` (breaks rule 2; renders as "*2 * x *a **b *.txt*") +- ``*2*x a**b O(N**2) e**(x*y) f(x)*f(y) a*(1+2)*`` + (breaks rule 7; renders as "*2*x a**b O(N**2) e**(x*y) f(x)*f(y) a*(1+2)*") + +It may be desirable to use `inline literals`_ for some of these anyhow, +especially if they represent code snippets. It's a judgment call. + +The following terms *do* require either literal-quoting or escaping to avoid +misinterpretation:: + + *4, class_, *args, **kwargs, `TeX-quoted', *ML, *.txt + +In most use cases, `inline literals`_ or `literal blocks`_ are the best +choice (by default, this also selects a monospaced font). Alternatively, the +inline markup characters can be escaped:: + + \*4, class\_, \*args, \**kwargs, \`TeX-quoted', \*ML, \*.txt + + +For languages that don't use whitespace between words (e.g. Japanese or +Chinese) it is recommended to set `character-level-inline-markup`_ to +True and eventually escape inline markup characters. +The examples breaking rules 6 and 7 above show which constructs may need +special attention. + +.. _character-level-inline-markup: + ../../user/config.html#character-level-inline-markup + + +Recognition order +----------------- + +Inline markup delimiter characters are used for multiple constructs, +so to avoid ambiguity there must be a specific recognition order for +each character. The inline markup recognition order is as follows: + +- Asterisks: `Strong emphasis`_ ("**") is recognized before emphasis_ + ("*"). + +- Backquotes: `Inline literals`_ ("``"), `inline internal targets`_ + (leading "_`", trailing "`"), are mutually independent, and are + recognized before phrase `hyperlink references`_ (leading "`", + trailing "\`_") and `interpreted text`_ ("`"). + +- Trailing underscores: Footnote references ("[" + label + "]_") and + simple `hyperlink references`_ (name + trailing "_") are mutually + independent. + +- Vertical bars: `Substitution references`_ ("|") are independently + recognized. + +- `Standalone hyperlinks`_ are the last to be recognized. + + +Character-Level Inline Markup +----------------------------- + +It is possible to mark up individual characters within a word with +backslash escapes (see `Escaping Mechanism`_ above). Backslash +escapes can be used to allow arbitrary text to immediately follow +inline markup:: + + Python ``list``\s use square bracket syntax. + +The backslash will disappear from the processed document. The word +"list" will appear as inline literal text, and the letter "s" will +immediately follow it as normal text, with no space in-between. + +Arbitrary text may immediately precede inline markup using +backslash-escaped whitespace:: + + Possible in *re*\ ``Structured``\ *Text*, though not encouraged. + +The backslashes and spaces separating "re", "Structured", and "Text" +above will disappear from the processed document. + +.. CAUTION:: + + The use of backslash-escapes for character-level inline markup is + not encouraged. Such use is ugly and detrimental to the + unprocessed document's readability. Please use this feature + sparingly and only where absolutely necessary. + + +Emphasis +-------- + +Doctree element: emphasis_. + +Start-string = end-string = "*". + +Text enclosed by single asterisk characters is emphasized:: + + This is *emphasized text*. + +Emphasized text is typically displayed in italics. + + +Strong Emphasis +--------------- + +Doctree element: strong_. + +Start-string = end-string = "**". + +Text enclosed by double-asterisks is emphasized strongly:: + + This is **strong text**. + +Strongly emphasized text is typically displayed in boldface. + + +Interpreted Text +---------------- + +Doctree element: depends on the explicit or implicit role and +processing. + +Start-string = end-string = "`". + +Interpreted text is text that is meant to be related, indexed, linked, +summarized, or otherwise processed, but the text itself is typically +left alone. Interpreted text is enclosed by single backquote +characters:: + + This is `interpreted text`. + +The "role" of the interpreted text determines how the text is +interpreted. The role may be inferred implicitly (as above; the +"default role" is used) or indicated explicitly, using a role marker. +A role marker consists of a colon, the role name, and another colon. +A role name is a single word consisting of alphanumerics plus isolated +internal hyphens, underscores, plus signs, colons, and periods; +no whitespace or other characters are allowed. A role marker is +either a prefix or a suffix to the interpreted text, whichever reads +better; it's up to the author:: + + :role:`interpreted text` + + `interpreted text`:role: + +Interpreted text allows extensions to the available inline descriptive +markup constructs. To emphasis_, `strong emphasis`_, `inline +literals`_, and `hyperlink references`_, we can add "title reference", +"index entry", "acronym", "class", "red", "blinking" or anything else +we want (as long as it is a simple `reference name`_). +Only pre-determined roles are recognized; unknown roles will +generate errors. A core set of standard roles is implemented in the +reference parser; see `reStructuredText Interpreted Text Roles`_ for +individual descriptions. The role_ directive can be used to define +custom interpreted text roles. In addition, applications may support +specialized roles. + +In `field lists`_, care must be taken when using interpreted text with +explicit roles in field names: the role must be a suffix to the +interpreted text. The following are recognized as field list items:: + + :`field name`:code:: interpreted text with explicit role as suffix + + :a `complex`:code:\ field name: a backslash-escaped space + is necessary + +The following are **not** recognized as field list items:: + + ::code:`not a field name`: paragraph with interpreted text + + :code:`not a field name`: paragraph with interpreted text + +Edge cases:: + + :field\:`name`: interpreted text (standard role) requires + escaping the leading colon in a field name + + :field:\`name`: not interpreted text + + +Inline Literals +--------------- + +Doctree element: literal_. + +Start-string = end-string = "``". + +Text enclosed by double-backquotes is treated as inline literals:: + + This text is an example of ``inline literals``. + +Inline literals may contain any characters except two adjacent +backquotes in an end-string context (according to the recognition +rules above). No markup interpretation (including backslash-escape +interpretation) is done within inline literals. + +Line breaks and sequences of whitespace characters +are *not* protected in inline literals. +Although a reStructuredText parser will preserve them in its output, +the final representation of the processed document depends on the +output formatter, thus the preservation of whitespace cannot be +guaranteed. If the preservation of line breaks and/or other +whitespace is important, `literal blocks`_ should be used. + +Inline literals are useful for short code snippets. For example:: + + The regular expression ``[+-]?(\d+(\.\d*)?|\.\d+)`` matches + floating-point numbers (without exponents). + + +Hyperlink References +-------------------- + +Doctree element: reference_. + +- Named hyperlink references: + + - No start-string, end-string = "_". + - Start-string = "`", end-string = "\`_". (Phrase references.) + +- Anonymous hyperlink references: + + - No start-string, end-string = "__". + - Start-string = "`", end-string = "\`__". (Phrase references.) + +Hyperlink references are indicated by a trailing underscore, "_", +except for `standalone hyperlinks`_ which are recognized +independently. The underscore can be thought of as a right-pointing +arrow. The trailing underscores point away from hyperlink references, +and the leading underscores point toward `hyperlink targets`_. + +Hyperlinks consist of two parts. In the text body, there is a source +link, a reference name with a trailing underscore (or two underscores +for `anonymous hyperlinks`_):: + + See the Python_ home page for info. + +A target link with a matching reference name must exist somewhere else +in the document. See `Hyperlink Targets`_ for a full description). + +`Anonymous hyperlinks`_ (which see) do not use reference names to +match references to targets, but otherwise behave similarly to named +hyperlinks. + + +Embedded URIs and Aliases +````````````````````````` + +A hyperlink reference may directly embed a target URI or (since +Docutils 0.11) a hyperlink reference within angle brackets ("<...>") +as follows:: + + See the `Python home page <https://www.python.org>`_ for info. + + This `link <Python home page_>`_ is an alias to the link above. + +This is exactly equivalent to:: + + See the `Python home page`_ for info. + + This link_ is an alias to the link above. + + .. _Python home page: https://www.python.org + .. _link: `Python home page`_ + +The bracketed URI must be preceded by whitespace and be the last text +before the end string. + +With a single trailing underscore, the reference is named and the same +target URI may be referred to again. +With two trailing underscores, the reference and target are both +anonymous, and the target cannot be referred to again. These are +"one-off" hyperlinks. For example:: + + `RFC 2396 <https://www.rfc-editor.org/rfc/rfc2396.txt>`__ and `RFC + 2732 <https://www.rfc-editor.org/rfc/rfc2732.txt>`__ together + define the syntax of URIs. + +Equivalent to:: + + `RFC 2396`__ and `RFC 2732`__ together define the syntax of URIs. + + __ https://www.rfc-editor.org/rfc/rfc2396.txt + __ https://www.rfc-editor.org/rfc/rfc2732.txt + +`Standalone hyperlinks`_ are treated as URIs, even if they end with an +underscore like in the example of a Python function documentation:: + + `__init__ <http:example.py.html#__init__>`__ + +If a target URI that is not recognized as `standalone hyperlink`_ happens +to end with an underscore, this needs to be backslash-escaped to avoid +being parsed as hyperlink reference. For example :: + + Use the `source <parrots.txt\_>`__. + +creates an anonymous reference to the file ``parrots.txt_``. + +If the reference text happens to end with angle-bracketed text that is +*not* a URI or hyperlink reference, at least one angle-bracket needs to +be backslash-escaped or an escaped space should follow. For example, here +are three references to titles describing a tag:: + + See `HTML Element: \<a>`_, `HTML Element: <b\> `_, and + `HTML Element: <c>\ `_. + +The reference text may also be omitted, in which case the URI will be +duplicated for use as the reference text. This is useful for relative +URIs where the address or file name is also the desired reference +text:: + + See `<a_named_relative_link>`_ or `<an_anonymous_relative_link>`__ + for details. + +.. CAUTION:: + + This construct offers easy authoring and maintenance of hyperlinks + at the expense of general readability. Inline URIs, especially + long ones, inevitably interrupt the natural flow of text. For + documents meant to be read in source form, the use of independent + block-level `hyperlink targets`_ is **strongly recommended**. The + embedded URI construct is most suited to documents intended *only* + to be read in processed form. + + +Inline Internal Targets +------------------------ + +Doctree element: target_. + +Start-string = "_`", end-string = "`". + +Inline internal targets are the equivalent of explicit `internal +hyperlink targets`_, but may appear within running text. The syntax +begins with an underscore and a backquote, is followed by a hyperlink +name or phrase, and ends with a backquote. Inline internal targets +may not be anonymous. + +For example, the following paragraph contains a hyperlink target named +"Norwegian Blue":: + + Oh yes, the _`Norwegian Blue`. What's, um, what's wrong with it? + +See `Implicit Hyperlink Targets`_ for the resolution of duplicate +reference names. + + +Footnote References +------------------- + +See also: Footnotes_ + +Doctree element: footnote_reference_. + +Configuration settings: +`footnote_references <footnote_references setting_>`_, +trim_footnote_reference_space_. + +.. _trim_footnote_reference_space: + ../../user/config.html#trim-footnote-reference-space + +Start-string = "[", end-string = "]_". + +Each footnote reference consists of a square-bracketed label followed +by a trailing underscore. Footnote labels are one of: + +- one or more digits (i.e., a number), + +- a single "#" (denoting `auto-numbered footnotes`_), + +- a "#" followed by a simple `reference name`_ (an `autonumber label`_), + or + +- a single "*" (denoting `auto-symbol footnotes`_). + +For example:: + + Please RTFM [1]_. + + .. [1] Read The Fine Manual + +`Inline markup recognition rules`_ may require whitespace in front of the +footnote reference. To remove the whitespace from the output, use an +escaped whitespace character (see `Escaping Mechanism`_) or set the +trim_footnote_reference_space_ configuration setting. Leading whitespace +is removed by default, if the `footnote_references setting`_ is +"superscript". + + +Citation References +------------------- + +See also: Citations_ + +Doctree element: citation_reference_. + +Start-string = "[", end-string = "]_". + +Each citation reference consists of a square-bracketed label followed +by a trailing underscore. Citation labels are simple `reference +names`_ (case-insensitive single words, consisting of alphanumerics +plus internal hyphens, underscores, and periods; no whitespace). + +For example:: + + Here is a citation reference: [CIT2002]_. + + +Substitution References +----------------------- + +Doctree elements: substitution_reference_, reference_. + +Start-string = "|", end-string = "|" (optionally followed by "_" or +"__"). + +Vertical bars are used to bracket the substitution reference text. A +substitution reference may also be a hyperlink reference by appending +a "_" (named) or "__" (anonymous) suffix; the substitution text is +used for the reference text in the named case. + +The processing system replaces substitution references with the +processed contents of the corresponding `substitution definitions`_ +(which see for the definition of "correspond"). Substitution +definitions produce inline-compatible elements. + +Examples:: + + This is a simple |substitution reference|. It will be replaced by + the processing system. + + This is a combination |substitution and hyperlink reference|_. In + addition to being replaced, the replacement text or element will + refer to the "substitution and hyperlink reference" target. + +.. _standalone hyperlink: + +Standalone Hyperlinks +--------------------- + +Doctree element: reference_. + +No start-string or end-string. + +A URI (absolute URI [#URI]_ or standalone email address) within a text +block is treated as a general external hyperlink with the URI itself +as the link's text. For example:: + + See https://www.python.org for info. + +would be marked up in HTML as:: + + See <a href="https://www.python.org">https://www.python.org</a> for + info. + +Two forms of URI are recognized: + +1. Absolute URIs. These consist of a scheme, a colon (":"), and a + scheme-specific part whose interpretation depends on the scheme. + + The scheme is the name of the protocol, such as "http", "ftp", + "mailto", or "telnet". The scheme consists of an initial letter, + followed by letters, numbers, and/or "+", "-", ".". Recognition is + limited to known schemes, per the `Official IANA Registry of URI + Schemes`_ and the W3C's `Retired Index of WWW Addressing Schemes`_. + + The scheme-specific part of the resource identifier may be either + hierarchical or opaque: + + - Hierarchical identifiers begin with one or two slashes and may + use slashes to separate hierarchical components of the path. + Examples are web pages and FTP sites:: + + https://www.python.org + + ftp://ftp.python.org/pub/python + + - Opaque identifiers do not begin with slashes. Examples are + email addresses and newsgroups:: + + mailto:someone@somewhere.com + + news:comp.lang.python + + With queries, fragments, and %-escape sequences, URIs can become + quite complicated. A reStructuredText parser must be able to + recognize any absolute URI, as defined in RFC2396_ and RFC2732_. + +2. Standalone email addresses, which are treated as if they were + absolute URIs with a "mailto:" scheme. Example:: + + someone@somewhere.com + +Punctuation at the end of a URI is not considered part of the URI, +unless the URI is terminated by a closing angle bracket (">"). +Backslashes may be used in URIs to escape markup characters, +specifically asterisks ("*") and underscores ("_") which are valid URI +characters (see `Escaping Mechanism`_ above). + +.. [#URI] Uniform Resource Identifier. URIs are a general form of + URLs (Uniform Resource Locators). For the syntax of URIs see + RFC2396_ and RFC2732_. + + +Units +===== + +(New in Docutils 0.3.10.) + +All measures consist of a positive floating point number in standard +(non-scientific) notation and a unit, possibly separated by one or +more spaces. + +Units are only supported where explicitly mentioned in the reference +manuals. + + +Length Units +------------ + +The following length units are supported by the reStructuredText +parser: + +* em (em unit, the element's font size) +* ex (ex unit, x-height of the element’s font) +* mm (millimeters; 1 mm = 1/1000 m) +* cm (centimeters; 1 cm = 10 mm) +* in (inches; 1 in = 2.54 cm = 96 px) +* px (pixels, 1 px = 1/96 in) [#]_ +* pt (points; 1 pt = 1/72 in) +* pc (picas; 1 pc = 1/6 in = 12 pt) + +This set corresponds to the `length units in CSS2`_ (a subset of `length +units in CSS3`_). + +.. [#] In LaTeX, the default definition is 1 px = 1/72 in (cf. `How to + configure the size of a pixel`_ in the LaTeX writer documentation). + +The following are all valid length values: "1.5em", "20 mm", ".5in". + +Length values without unit are completed with a writer-dependent +default (e.g. "px" with HTML, "pt" with `latex2e`). See the writer +specific documentation in the `user doc`__ for details. + +.. _length units in CSS2: + https://www.w3.org/TR/CSS2/syndata.html#length-units +.. _length units in CSS3: + https://www.w3.org/TR/css-values-3/#absolute-lengths +.. _How to configure the size of a pixel: + ../../user/latex.html#size-of-a-pixel +__ ../../user/ + + +Percentage Units +---------------- + +Percentage values have a percent sign ("%") as unit. Percentage +values are relative to other values, depending on the context in which +they occur. + + +---------------- + Error Handling +---------------- + +Doctree elements: system_message_, problematic_. + +Markup errors are handled according to the specification in `PEP +258`_. + + +.. _reStructuredText: https://docutils.sourceforge.io/rst.html +.. _Docutils: https://docutils.sourceforge.io/ +.. _Docutils Generic DTD: ../docutils.dtd +.. _transforms: + https://docutils.sourceforge.io/docutils/transforms/ +.. _Grouch: http://www.mems-exchange.org/software/grouch/ +.. _RFC822: https://www.rfc-editor.org/rfc/rfc822.txt +.. _DocTitle transform: +.. _DocInfo transform: + https://docutils.sourceforge.io/docutils/transforms/frontmatter.py +.. _getopt.py: + https://docs.python.org/3/library/getopt.html +.. _GNU libc getopt_long(): + https://www.gnu.org/software/libc/manual/html_node/Getopt-Long-Options.html +.. _doctest module: + https://docs.python.org/3/library/doctest.html +.. _Emacs table mode: http://table.sourceforge.net/ +.. _Official IANA Registry of URI Schemes: + http://www.iana.org/assignments/uri-schemes +.. _Retired Index of WWW Addressing Schemes: + https://www.w3.org/Addressing/schemes.html +.. _World Wide Web Consortium: https://www.w3.org/ +.. _HTML Techniques for Web Content Accessibility Guidelines: + https://www.w3.org/TR/WCAG10-HTML-TECHS/#link-text +.. _RFC2396: https://www.rfc-editor.org/rfc/rfc2396.txt +.. _RFC2732: https://www.rfc-editor.org/rfc/rfc2732.txt +.. _Zope: http://www.zope.com/ +.. _PEP 258: ../../peps/pep-0258.html +.. _writers: ../../peps/pep-0258.html#writers + +.. _reStructuredText Directives: directives.html +.. _admonition: directives.html#admonitions +.. _code: directives.html#code +.. _math: directives.html#math +.. _raw: directives.html#raw +.. _figure: directives.html#figure +.. _image: directives.html#image +.. _meta: directives.html#metadata +.. _replace: directives.html#replace +.. _role: directives.html#custom-interpreted-text-roles +.. _table directive: directives.html#table +.. _list table: directives.html#list-table +.. _CSV table: directives.html#csv-table +.. _custom roles: directives.html#role +.. _reStructuredText Interpreted Text Roles: roles.html +.. _"raw" role: roles.html#raw + +.. _Document Tree: +.. _The Docutils Document Tree: ../doctree.html +.. _"classes" attribute: ../doctree.html#classes +.. _topic: ../doctree.html#topic +.. _address: ../doctree.html#address +.. _author: ../doctree.html#author +.. _authors: ../doctree.html#authors +.. _contact: ../doctree.html#contact +.. _copyright: ../doctree.html#copyright +.. _date: ../doctree.html#date +.. _topic: ../doctree.html#topic +.. _organization: ../doctree.html#organization +.. _revision: ../doctree.html#revision +.. _status: ../doctree.html#status +.. _version: ../doctree.html#version +.. _docinfo: ../doctree.html#docinfo +.. _field: ../doctree.html#field +.. _section: ../doctree.html#section +.. _bullet_list: ../doctree.html#bullet-list +.. _list_item: ../doctree.html#list-item +.. _enumerated_list: ../doctree.html#enumerated-list +.. _list_item: ../doctree.html#list-item +.. _definition_list: ../doctree.html#definition-list +.. _definition_list_item: ../doctree.html#definition-list-item +.. _term: ../doctree.html#term +.. _classifier: ../doctree.html#classifier +.. _definition: ../doctree.html#definition +.. _field_list: ../doctree.html#field-list +.. _field_name: ../doctree.html#field-name +.. _field_body: ../doctree.html#field-body +.. _option_list: ../doctree.html#option-list +.. _option_list_item: ../doctree.html#option-list-item +.. _option_group: ../doctree.html#option-group +.. _option: ../doctree.html#option +.. _option_string: ../doctree.html#option-string +.. _option_argument: ../doctree.html#option-argument +.. _description: ../doctree.html#description +.. _line_block: ../doctree.html#line-block +.. _line: ../doctree.html#line +.. _table: ../doctree.html#table +.. _tgroup: ../doctree.html#tgroup +.. _colspec: ../doctree.html#colspec +.. _thead: ../doctree.html#thead +.. _tbody: ../doctree.html#tbody +.. _title: ../doctree.html#title +.. _row: ../doctree.html#row +.. _entry: ../doctree.html#entry +.. _identifier key: ../doctree.html#identifier-keys +.. _document element: ../doctree.html#document +.. _footnote: ../doctree.html#footnote +.. _label: ../doctree.html#label +.. _citation: ../doctree.html#citation +.. _target: ../doctree.html#target +.. _footnote_reference: ../doctree.html#footnote-reference +.. _citation_reference: ../doctree.html#citation-reference +.. _transition: ../doctree.html#transition +.. _paragraph: ../doctree.html#paragraph +.. _literal_block: ../doctree.html#literal-block +.. _block_quote: ../doctree.html#block-quote +.. _attribution: ../doctree.html#attribution +.. _doctest_block: ../doctree.html#doctest-block +.. _substitution_definition: ../doctree.html#substitution-definition +.. _comment: ../doctree.html#comment +.. _strong: ../doctree.html#strong +.. _literal: ../doctree.html#literal +.. _reference: ../doctree.html#reference +.. _substitution_reference: ../doctree.html#substitution-reference +.. _reference: ../doctree.html#reference +.. _reference: ../doctree.html#reference +.. _system_message: ../doctree.html#system-message +.. _problematic: ../doctree.html#problematic + + + +.. + Local Variables: + mode: indented-text + indent-tabs-mode: nil + sentence-end-double-space: t + fill-column: 70 + End: diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/ref/rst/roles.txt b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/ref/rst/roles.txt new file mode 100644 index 00000000..1d077165 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/ref/rst/roles.txt @@ -0,0 +1,391 @@ +========================================= + reStructuredText Interpreted Text Roles +========================================= + +:Author: David Goodger +:Contact: docutils-develop@lists.sourceforge.net +:Revision: $Revision$ +:Date: $Date$ +:Copyright: This document has been placed in the public domain. + +This document describes the interpreted text roles implemented in the +reference reStructuredText parser. + +Interpreted text uses backquotes (`) around the text. An explicit +role marker may optionally appear before or after the text, delimited +with colons. For example:: + + This is `interpreted text` using the default role. + + This is :title:`interpreted text` using an explicit role. + +A default role may be defined by applications of reStructuredText; it +is used if no explicit ``:role:`` prefix or suffix is given. The +"default default role" is `:title-reference:`_. It can be changed +using the default-role_ directive. + +See the `Interpreted Text`_ section in the `reStructuredText Markup +Specification`_ for syntax details. For details on the hierarchy of +elements, please see `The Docutils Document Tree`_ and the `Docutils +Generic DTD`_ XML document type definition. For interpreted text role +implementation details, see `Creating reStructuredText Interpreted +Text Roles`_. + +.. _"role" directive: directives.html#role +.. _default-role: directives.html#default-role +.. _Interpreted Text: restructuredtext.html#interpreted-text +.. _reStructuredText Markup Specification: restructuredtext.html +.. _The Docutils Document Tree: ../doctree.html +.. _Docutils Generic DTD: ../docutils.dtd +.. _Creating reStructuredText Interpreted Text Roles: + ../../howto/rst-roles.html + + +.. contents:: + + +--------------- + Customization +--------------- + +Custom interpreted text roles may be defined in a document with the +`"role" directive`_. Customization details are listed with each role. + +.. _class: + +A ``class`` option is recognized by the "role" directive for most +interpreted text roles. A description__ is provided in the `"role" +directive`_ documentation. + +__ directives.html#role-class + + +---------------- + Standard Roles +---------------- + +``:emphasis:`` +============== + +:Aliases: None +:DTD Element: emphasis +:Customization: + :Options: class_. + :Content: None. + +Implements emphasis. These are equivalent:: + + *text* + :emphasis:`text` + + +``:literal:`` +============== + +:Aliases: None +:DTD Element: literal +:Customization: + :Options: class_. + :Content: None. + +Implements inline literal text. These are equivalent:: + + ``text`` + :literal:`text` + +Care must be taken with backslash-escapes though. These are *not* +equivalent:: + + ``text \ and \ backslashes`` + :literal:`text \ and \ backslashes` + +The backslashes in the first line are preserved (and do nothing), +whereas the backslashes in the second line escape the following +spaces. + + +``:code:`` +========== + +:Aliases: None +:DTD Element: literal +:Customization: + :Options: class_, language + :Content: None. + +(New in Docutils 0.9.) + +The ``code`` role marks its content as code in a formal language. + +For syntax highlight of inline code, the `"role" directive`_ can be used to +build custom roles with the code language specified in the "language" +option. + +For example, the following creates a LaTeX-specific "latex" role:: + + .. role:: latex(code) + :language: latex + +Content of the new role is parsed and tagged by the Pygments_ syntax +highlighter. See the `code directive`_ for more info on parsing and display +of code in reStructuredText. + +In addition to "class_", the following option is recognized: + +``language`` : text + Name of the code's language. + See `supported languages and markup formats`_ for recognized values. + +.. _code directive: directives.html#code +.. _Pygments: https://pygments.org/ +.. _supported languages and markup formats: https://pygments.org/languages/ + + +``:math:`` +========== + +:Aliases: None +:DTD Element: math +:Customization: + :Options: class_ + :Content: None. + +(New in Docutils 0.8.) + +The ``math`` role marks its content as mathematical notation (inline +formula). + +The input format is LaTeX math syntax without the “math delimiters“ +(``$ $``), for example:: + + The area of a circle is :math:`A_\text{c} = (\pi/4) d^2`. + +See the `math directive`_ (producing display formulas) for more info +on mathematical notation in reStructuredText. + +.. _math directive: directives.html#math + + +``:pep-reference:`` +=================== + +:Aliases: ``:PEP:`` +:DTD Element: reference +:Customization: + :Options: class_. + :Content: None. + +The ``:pep-reference:`` role is used to create an HTTP reference to a +PEP (Python Enhancement Proposal). The ``:PEP:`` alias is usually +used. The content must be a number, for example:: + + See :PEP:`287` for more information about reStructuredText. + +This is equivalent to:: + + See `PEP 287`__ for more information about reStructuredText. + + __ https://peps.python.org/pep-0287 + + +``:rfc-reference:`` +=================== + +:Aliases: ``:RFC:`` +:DTD Element: reference +:Customization: + :Options: class_. + :Content: None. + +The ``:rfc-reference:`` role is used to create an HTTP reference to an +RFC (Internet Request for Comments). The ``:RFC:`` alias is usually +used. The content must be a number [#]_, for example:: + + See :RFC:`2822` for information about email headers. + +This is equivalent to:: + + See `RFC 2822`__ for information about email headers. + + __ https://tools.ietf.org/html/rfc2822.html + +.. [#] You can link to a specific section by saying + ``:rfc:`number#anchor```. (New in Docutils 0.15.) + + .. Warning:: The anchor (anything following a ``#``) is appended to + the reference without any checks and not shown in the link text. + + It is recommended to use `hyperlink references`_ for + anything more complex than a single RFC number. + +.. _hyperlink references: restructuredtext.html#hyperlink-references + + +``:strong:`` +============ + +:Aliases: None +:DTD Element: strong +:Customization: + :Options: class_. + :Content: None. + +Implements strong emphasis. These are equivalent:: + + **text** + :strong:`text` + + +``:subscript:`` +=============== + +:Aliases: ``:sub:`` +:DTD Element: subscript +:Customization: + :Options: class_. + :Content: None. + +Implements subscripts. + +.. Tip:: + + Whitespace or punctuation is required around interpreted text, but + often not desired with subscripts & superscripts. + Backslash-escaped whitespace can be used; the whitespace will be + removed from the processed document:: + + H\ :sub:`2`\ O + E = mc\ :sup:`2` + + In such cases, readability of the plain text can be greatly + improved with substitutions:: + + The chemical formula for pure water is |H2O|. + + .. |H2O| replace:: H\ :sub:`2`\ O + + See `the reStructuredText spec`__ for further information on + `character-level markup`__ and `the substitution mechanism`__. + + __ restructuredtext.html + __ restructuredtext.html#character-level-inline-markup + __ restructuredtext.html#substitution-references + + +``:superscript:`` +================= + +:Aliases: ``:sup:`` +:DTD Element: superscript +:Customization: + :Options: class_. + :Content: None. + +Implements superscripts. See the tip in `:subscript:`_ above. + + +``:title-reference:`` +===================== + +:Aliases: ``:title:``, ``:t:``. +:DTD Element: title_reference +:Customization: + :Options: class_. + :Content: None. + +The ``:title-reference:`` role is used to describe the titles of +books, periodicals, and other materials. It is the equivalent of the +HTML "cite" element, and it is expected that HTML writers will +typically render "title_reference" elements using "cite". + +Since title references are typically rendered with italics, they are +often marked up using ``*emphasis*``, which is misleading and vague. +The "title_reference" element provides accurate and unambiguous +descriptive markup. + +Let's assume ``:title-reference:`` is the default interpreted text +role (see below) for this example:: + + `Design Patterns` [GoF95]_ is an excellent read. + +The following document fragment (pseudo-XML_) will result from +processing:: + + <paragraph> + <title_reference> + Design Patterns + + <citation_reference refname="gof95"> + GoF95 + is an excellent read. + +``:title-reference:`` is the default interpreted text role in the +standard reStructuredText parser. This means that no explicit role is +required. Applications of reStructuredText may designate a different +default role, in which case the explicit ``:title-reference:`` role +must be used to obtain a ``title_reference`` element. + + +.. _pseudo-XML: ../doctree.html#pseudo-xml + + +------------------- + Specialized Roles +------------------- + +``raw`` +======= + +:Aliases: None +:DTD Element: raw +:Customization: + :Options: class_, format + :Content: None + +.. WARNING:: + + The "raw" role is a stop-gap measure allowing the author to bypass + reStructuredText's markup. It is a "power-user" feature that + should not be overused or abused. The use of "raw" ties documents + to specific output formats and makes them less portable. + + If you often need to use "raw"-derived interpreted text roles or + the "raw" directive, that is a sign either of overuse/abuse or that + functionality may be missing from reStructuredText. Please + describe your situation in a message to the Docutils-users_ mailing + list. + + .. _Docutils-users: ../../user/mailing-lists.html#docutils-user + +The "raw" role indicates non-reStructuredText data that is to be +passed untouched to the Writer. It is the inline equivalent of the +`"raw" directive`_; see its documentation for details on the +semantics. + +.. _"raw" directive: directives.html#raw-directive + +The "raw" role cannot be used directly. The `"role" directive`_ must +first be used to build custom roles based on the "raw" role. One or +more formats (Writer names) must be provided in a "format" option. + +For example, the following creates an HTML-specific "raw-html" role:: + + .. role:: raw-html(raw) + :format: html + +This role can now be used directly to pass data untouched to the HTML +Writer. For example:: + + If there just *has* to be a line break here, + :raw-html:`<br />` + it can be accomplished with a "raw"-derived role. + But the line block syntax should be considered first. + +.. Tip:: Roles based on "raw" should clearly indicate their origin, so + they are not mistaken for reStructuredText markup. Using a "raw-" + prefix for role names is recommended. + +In addition to "class_", the following option is recognized: + +``format`` : text + One or more space-separated output format names (Writer names). diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/config.txt b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/config.txt new file mode 100644 index 00000000..22877ef3 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/config.txt @@ -0,0 +1,2336 @@ +======================== + Docutils Configuration +======================== + +:Author: David Goodger +:Contact: docutils-develop@lists.sourceforge.net +:Revision: $Revision$ +:Date: $Date$ +:Copyright: This document has been placed in the public domain. + +.. sidebar:: Docutils Security for Web Applications + + For details about securing web applications, please see `Deploying + Docutils Securely <../howto/security.html>`_. + +.. contents:: + + +------------------- +Configuration Files +------------------- + +Configuration files are used for persistent customization; +they can be set once and take effect every time you use a component, +e.g., via a `front-end tool`_. +Configuration file settings override the built-in defaults, and +command-line options override all. +For the technicalities, see `Docutils Runtime Settings`_. + +By default, Docutils checks the following places for configuration +files, in the following order: + +1. ``/etc/docutils.conf``: This is a system-wide configuration file, + applicable to all Docutils processing on the system. + +2. ``./docutils.conf``: This is a project-specific configuration file, + located in the current directory. The Docutils front end has to be + executed from the directory containing this configuration file for + it to take effect (note that this may have nothing to do with the + location of the source files). Settings in the project-specific + configuration file will override corresponding settings in the + system-wide file. + +3. ``~/.docutils``: This is a user-specific configuration file, + located in the user's home directory. Settings in this file will + override corresponding settings in both the system-wide and + project-specific configuration files. + +If more than one configuration file is found, all will be read but +later entries will override earlier ones. For example, a "stylesheet" +entry in a user-specific configuration file will override a +"stylesheet" entry in the system-wide file. + +The default implicit config file paths can be overridden by the +``DOCUTILSCONFIG`` environment variable. ``DOCUTILSCONFIG`` should +contain a colon-separated (semicolon-separated on Windows) sequence of +config file paths to search for; leave it empty to disable implicit +config files altogether. Tilde-expansion is performed on paths. +Paths are interpreted relative to the current working directory. +Empty path items are ignored. + +In addition, a configuration file may be explicitly specified with the +``--config`` command-line option. This configuration file is read after +the three implicit ones listed above (or the ones defined by the +``DOCUTILSCONFIG`` environment variable), and its entries will have +priority. + +.. _Docutils Runtime Settings: ../api/runtime-settings.html + + +------------------------- +Configuration File Syntax +------------------------- + +Configuration files are UTF-8-encoded text files. The +ConfigParser.py_ module from Python_'s standard library is used to +read them. From its documentation: + + The configuration file consists of sections, lead by a "[section]" + header and followed by "name: value" entries, with continuations + in the style of `RFC 822`_; "name=value" is also accepted. Note + that leading whitespace is removed from values. ... Lines + beginning with "#" or ";" are ignored and may be used to provide + comments. + +.. Note:: No format string interpolation is done. + +Configuration file entry names correspond to internal runtime +settings. Underscores ("_") and hyphens ("-") can be used +interchangeably in entry names; hyphens are automatically converted to +underscores. + +For on/off switch settings (_`booleans`), the following values are +recognized: + +:On: "true", "yes", "on", "1" +:Off: "false", "no", "off", "0", "" (no value) + +.. _list: + +List values can be comma- or colon-delimited. + +strip_classes_, strip_elements_with_classes_, stylesheet, and +stylesheet_path use the comma as delimiter, +whitespace around list values is stripped. :: + + strip-classes: ham,eggs, + strip-elements-with-classes: sugar, salt, flour + stylesheet: html4css1.css, + math.css, + style with spaces.css + stylesheet-path: ../styles/my.css, ../styles/funny.css + +expose_internals_, ignore_ and prune_ use the colon as delimiter and do not +strip whitespace:: + + expose_internals: b:c:d + + +Example +======= + +This is from the ``tools/docutils.conf`` configuration file supplied +with Docutils:: + + # These entries affect all processing: + [general] + source-link: yes + datestamp: %Y-%m-%d %H:%M UTC + generator: on + + # These entries affect HTML output: + [html writers] + embed-stylesheet: no + + [html4css1 writer] + stylesheet-path: docutils/writers/html4css1/html4css1.css + field-name-limit: 20 + + [html5 writer] + stylesheet-dirs: docutils/writers/html5_polyglot/ + stylesheet-path: minimal.css, responsive.css + +Individual configuration sections and settings are described in the +following section. + + +------------------------------------- +Configuration File Sections & Entries +------------------------------------- + +Below are the Docutils runtime settings, listed by config file +section. **Any setting may be specified in any section, but only +settings from active sections will be used.** Sections correspond to +Docutils components (module name or alias; section names are always in +lowercase letters). Each `Docutils application`_ uses a specific set +of components; corresponding configuration file sections are applied +when the application is used. Configuration sections are applied in +general-to-specific order, as follows: + +1. `[general]`_ + +2. `[parsers]`_, parser dependencies, and the section specific to the + Parser used ("[... parser]"). + +3. `[readers]`_, reader dependencies, and the section specific to the + Reader used ("[... reader]"). For example, `[pep reader]`_ depends + on `[standalone reader]`_. + +4. `[writers]`_, writer family ("[... writers]"; if applicable), + writer dependencies, and the section specific to the writer used + ("[... writer]"). For example, `[pep_html writer]`_ depends + on `[html writers]`_ and `[html4css1 writer]`_. + +5. `[applications]`_, application dependencies, and the section + specific to the Application (front-end tool) in use + ("[... application]"). + +Since any setting may be specified in any section, this ordering +allows component- or application-specific overrides of earlier +settings. For example, there may be Reader-specific overrides of +general settings; Writer-specific overrides of Parser settings; +Application-specific overrides of Writer settings; and so on. + +If multiple configuration files are applicable, the process is +completed (all sections are applied in the order given) for each one +before going on to the next. For example, a "[pep_html writer] +stylesheet" setting in an earlier configuration file would be +overridden by an "[html4css1 writer] stylesheet" setting in a later +file. + +Some knowledge of Python_ is assumed for some attributes. + +.. _ConfigParser.py: + https://docs.python.org/3/library/configparser.html +.. _Python: https://www.python.org/ +.. _RFC 822: https://www.rfc-editor.org/rfc/rfc822.txt +.. _front-end tool: +.. _Docutils application: tools.html + + +[general] +========= + +Settings in the "[general]" section are always applied. + +auto_id_prefix +-------------- + +Prefix prepended to all auto-generated `identifier keys` generated within +the document, after id_prefix_. Ensure the value conforms to the +restrictions on identifiers in the output format, as it is not subjected to +the `identifier normalization`_. + +A trailing "%" is replaced with the tag name (new in Docutils 0.16). + +Default: "%" (changed in 0.18 from "id"). +Option: ``--auto-id-prefix`` (hidden, intended mainly for programmatic use). + +.. _identifier normalization: + ../ref/rst/directives.html#identifier-normalization + +datestamp +--------- + +Include a time/datestamp in the document footer. Contains a +format string for Python's `time.strftime()`__. + +Default: None. +Options: ``--date, -d, --time, -t, --no-datestamp``. + +Configuration file entry examples:: + + # Equivalent to --date command-line option, results in + # ISO 8601 extended format datestamp, e.g. "2001-12-21": + datestamp: %Y-%m-%d + + # Equivalent to --time command-line option, results in + # date/timestamp like "2001-12-21 18:43 UTC": + datestamp: %Y-%m-%d %H:%M UTC + + # Disables datestamp; equivalent to --no-datestamp: + datestamp: + +__ https://docs.python.org/3/library/time.html#time.strftime + +debug +----- + +Report debug-level system messages. + +Default: don't (None). Options: ``--debug, --no-debug``. + +dump_internals +-------------- + +At the end of processing, write all internal attributes of the +document (``document.__dict__``) to stderr. + +Default: don't (None). +Option: ``--dump-internals`` (hidden, for development use only). + +dump_pseudo_xml +--------------- + +At the end of processing, write the pseudo-XML representation of +the document to stderr. + +Default: don't (None). +Option: ``--dump-pseudo-xml`` (hidden, for development use only). + +dump_settings +------------- + +At the end of processing, write all Docutils settings to stderr. + +Default: don't (None). +Option: ``--dump-settings`` (hidden, for development use only). + +dump_transforms +--------------- + +At the end of processing, write a list of all transforms applied +to the document to stderr. + +Default: don't (None). +Option: ``--dump-transforms`` (hidden, for development use only). + +error_encoding +-------------- + +The text encoding for error output. + +Default: "ascii". Options: ``--error-encoding, -e``. + +error_encoding_error_handler +---------------------------- + +The error handler for unencodable characters in error output. See +output_encoding_error_handler_ for acceptable values. + +Default: "backslashreplace" +Options: ``--error-encoding-error-handler, --error-encoding, -e``. + +exit_status_level +----------------- + +A system message level threshold; non-halting system messages at +or above this level will produce a non-zero exit status at normal +exit. Exit status is the maximum system message level plus 10 (11 +for INFO, etc.). + +Default: disabled (5). Option: ``--exit-status``. + +expose_internals +---------------- + +List_ of internal attributes to expose as external attributes (with +"internal:" namespace prefix). To specify multiple attributes in +configuration files, use colons to separate names; on the command +line, the option may be used more than once. + +Default: don't (None). +Option: ``--expose-internal-attribute`` (hidden, for development use only). + +footnote_backlinks +------------------ + +Enable or disable backlinks from footnotes_ and citations_ to their +references. + +Default: enabled (True). +Options: ``--footnote-backlinks, --no-footnote-backlinks``. + +generator +--------- + +Include a "Generated by Docutils" credit and link in the document footer. + +Default: off (None). Options: ``--generator, -g, --no-generator``. + +halt_level +---------- + +The threshold at or above which system messages are converted to +exceptions, halting execution immediately. If `traceback`_ is set, the +exception will propagate; otherwise, Docutils will exit. + +See also report_level_. + +Default: severe (4). Options: ``--halt, --strict``. + +id_prefix +--------- + +Prefix prepended to all identifier keys generated within the document. +Ensure the value conforms to the restrictions on identifiers in the output +format, as it is not subjected to the `identifier normalization`_. +See also auto_id_prefix_. + +Default: "" (empty). +Option: ``--id-prefix`` (hidden, intended mainly for programmatic use). + +input_encoding +-------------- + +The text encoding for input. + +Default: auto-detect (None). Options: ``--input-encoding, -i``. + +input_encoding_error_handler +---------------------------- + +The error handler for undecodable characters in the input. Acceptable +values include: + +strict + Raise an exception in case of an encoding error. +replace + Replace malformed data with the official Unicode replacement + character, U+FFFD. +ignore + Ignore malformed data and continue without further notice. + +Acceptable values are the same as for the "error" parameter of +Python's ``unicode`` function; other values may be defined in +applications or in future versions of Python. + +Default: "strict". +Options: ``--input-encoding-error-handler, --input-encoding, -i``. + +language_code +------------- + +Case-insensitive `language tag`_ as defined in `BCP 47`_. + +Sets the document language, also used for localized directive and +role names as well as Docutils-generated text. + +A typical language identifier consists of a 2-letter language code +from `ISO 639`_ (3-letter codes can be used if no 2-letter code +exists). The language identifier can have an optional subtag, +typically for variations based on country (from `ISO 3166`_ +2-letter country codes). Avoid subtags except where they add +useful distinguishing information. Examples of language tags +include "fr", "en-GB", "pt-br" (the same as "pt-BR"), and +"de-1901" (German with pre-1996 spelling). + +The language of document parts can be specified with a +"language-<language tag>" `class attribute`_, e.g. +``.. class:: language-el-polyton`` for a quote in polytonic Greek. + +Default: English ("en"). Options: ``--language, -l``. + +.. _class attribute: ../ref/doctree.html#classes + +output_encoding +--------------- + +The text encoding for output. + +Default: "UTF-8". Options: ``--output-encoding, -o``. + +output_encoding_error_handler +----------------------------- + +The error handler for unencodable characters in the output. Acceptable +values include: + +strict + Raise an exception in case of an encoding error. +replace + Replace malformed data with a suitable replacement marker, + such as "?". +ignore + Ignore malformed data and continue without further notice. +xmlcharrefreplace + Replace with the appropriate XML character reference, such as + "``†``". +backslashreplace + Replace with backslash escape sequences, such as "``\u2020``". + +Acceptable values are the same as for the "error" parameter of +Python's ``encode`` string method; other values may be defined in +applications or in future versions of Python. + +Default: "strict". +Options: ``--output-encoding-error-handler, --output-encoding, -o``. + +record_dependencies +------------------- + +Path to a file where Docutils will write a list of files that were +required to generate the output, e.g. included files or embedded +stylesheets [#dependencies]_. [#pwd]_ The format is one path per +line with forward slashes as separator, the encoding is ``utf8``. + +Set to ``-`` in order to write dependencies to stdout. + +This option is particularly useful in conjunction with programs like +``make`` using ``Makefile`` rules like:: + + ham.html: ham.txt $(shell cat hamdeps.txt) + rst2html.py --record-dependencies=hamdeps.txt ham.txt ham.html + +If the filesystem encoding differs from utf8, replace the ``cat`` +command with a call to a converter, e.g.:: + + $(shell iconv -f utf8 -t latin1 hamdeps.txt) + +Default: None. Option: ``--record-dependencies``. + +.. [#dependencies] Images are only added to the dependency list if they + are embedded into the output or the reStructuredText parser extracted + image dimensions from the file. + +report_level +------------ + +Report system messages at or higher than <level>: + +1 info +2 warning +3 error +4 severe +5 none + +See also halt_level_. + +Default: warning (2). +Options: ``--report, -r, --verbose, -v, --quiet, -q``. + +sectnum_xform +------------- + +Enable or disable automatic section numbering by Docutils +(docutils.transforms.parts.SectNum) associated with the `sectnum +directive`_. + +If disabled, section numbers might be added to the output by the +renderer (e.g. by LaTeX or via a CSS style definition). + +Default: enabled (True). +Options: ``--section-numbering``, ``--no-section-numbering``. + +.. _sectnum directive: ../ref/rst/directives.html#sectnum + +source_link +----------- + +Include a "View document source" link in the document footer. URL will +be relative to the destination. + +Default: don't (None). +Options: ``--source-link, -s, --no-source-link``. + +source_url +---------- + +An explicit URL for a "View document source" link, used verbatim. + +Default: compute if source_link (None). +Options: ``--source-url, --no-source-link``. + +strict_visitor +-------------- + +When processing a document tree with the Visitor pattern, raise an +error if a writer does not support a node type listed as optional. For +transitional development use. + +Default: disabled (None). +Option: ``--strict-visitor`` (hidden, for development use only). + +strip_classes +------------- + +Comma-separated list_ of "classes" attribute values to remove from all +elements in the document tree. The command line option may be used more +than once. + +.. WARNING:: Potentially dangerous; use with caution. + +Default: disabled (None). Option: ``--strip-class``. + +strip_comments +-------------- + +Enable the removal of comment elements from the document tree. + +Default: disabled (None). +Options: ``--strip-comments``, ``--leave-comments``. + +strip_elements_with_classes +--------------------------- + +Comma-separated list_ of "classes" attribute values; +matching elements are removed from the document tree. +The command line option may be used more than once. + +.. WARNING:: Potentially dangerous; use with caution. + +Default: disabled (None). Option: ``--strip-element-with-class``. + +title +----- + +The `document title` as metadata which does not become part of the +document body. Stored as the document's `title attribute`_. +For example, in HTML output the metadata document title +appears in the title bar of the browser window. + +This setting overrides a displayed `document title`_ and +is overridden by a `"title" directive`_. + +Default: none. Option: ``--title``. + +.. _title attribute: ../ref/doctree.html#title-attribute +.. _document title: ../ref/rst/restructuredtext.html#document-title +.. _"title" directive: ../ref/rst/directives.html#metadata-document-title + +toc_backlinks +------------- + +Enable backlinks from section titles to table of contents entries +("entry"), to the top of the TOC ("top"), or disable ("none"). + +Default: "entry". +Options: ``--toc-entry-backlinks, --toc-top-backlinks, --no-toc-backlinks``. + +traceback +--------- + +Enable Python tracebacks when halt-level system messages and other +exceptions occur. Useful for debugging, and essential for issue +reports. Exceptions are allowed to propagate, instead of being +caught and reported (in a user-friendly way) by Docutils. + +Default: disabled (None) unless Docutils is run programmatically +using the `Publisher Interface`_. +Options: ``--traceback, --no-traceback``. + +.. _Publisher Interface: ../api/publisher.html + +warning_stream +-------------- + +Path to a file for the output of system messages (warnings). [#pwd]_ + +Default: stderr (None). Option: ``--warnings``. + + +[parsers] +========= + +Generic parser options: + +file_insertion_enabled +---------------------- + +Enable or disable directives or directive that insert the contents of +external files, such as "include_" or "raw_" with option "url". +A "warning" system message (including the directive text) is inserted +instead. (See also raw_enabled_ for another security-relevant setting.) + +Default: enabled (True). +Options: ``--file-insertion-enabled, --no-file-insertion``. + +.. _include: ../ref/rst/directives.html#include +.. _raw: ../ref/rst/directives.html#raw + +line_length_limit +----------------- + +Maximal number of characters in an input line or `substitution`_ +definition. To prevent extraordinary high processing times or memory +usage for certain input constructs, a "warning" system message is +inserted instead. + +Default: 10 000. +Option: ``--line-length-limit`` + +New in Docutils 0.17. + +.. _substitution: ../ref/rst/directives.html#substitution + +raw_enabled +----------- + +Enable or disable the "raw_" directive. A "warning" system message +(including the directive text) is inserted instead. See also +file_insertion_enabled_ for another security-relevant setting. + +Default: enabled (True). Options: ``--raw-enabled, --no-raw``. + + +[restructuredtext parser] +------------------------- + +character_level_inline_markup +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Relax the `inline markup recognition rules`_ +requiring whitespace or punctuation around inline markup. + +Allows character level inline markup without escaped whithespace and is +especially suited for languages that do not use whitespace to separate words +(e.g. Japanese, Chinese). + +.. WARNING:: Potentially dangerous; use with caution. + + When changing this setting to "True", inline markup characters in + URLs, names and formulas must be escaped to prevent recognition and + possible errors. Examples:: + + http://rST_for_all.html (hyperlinks to rST_ and for_) + x_2, inline_markup (hyperlinks to x_ and inline_) + 2*x (starts emphasised text) + a|b (starts a substitution reference) + +Default: disabled (False). +Options: ``--character-level-inline-markup, --word-level-inline-markup``. + +New in Docutils 0.13. + +pep_references +~~~~~~~~~~~~~~ + +Recognize and link to standalone PEP references (like "PEP 258"). + +Default: disabled (None); enabled (True) in PEP Reader. +Option: ``--pep-references``. + +pep_base_url +~~~~~~~~~~~~ +Base URL for PEP references. + +Default: "https://peps.python.org/". +Option: ``--pep-base-url``. + +pep_file_url_template +~~~~~~~~~~~~~~~~~~~~~ + +Template for PEP file part of URL, interpolated with the PEP +number and appended to pep_base_url_. + +Default: "pep-%04d". Option: ``--pep-file-url``. + +rfc_references +~~~~~~~~~~~~~~ + +Recognize and link to standalone RFC references (like "RFC 822"). + +Default: disabled (None); enabled (True) in PEP Reader. +Option: ``--rfc-references``. + +rfc_base_url +~~~~~~~~~~~~ + +Base URL for RFC references. + +Default: "http://www.faqs.org/rfcs/". Option: ``--rfc-base-url``. + +smart_quotes +~~~~~~~~~~~~ + +Activate the SmartQuotes_ transform to +change straight quotation marks to typographic form. `Quote characters`_ +are selected according to the language of the current block element (see +language_code_, smartquotes_locales_, and the `pre-defined quote sets`__). + +Also changes consecutive runs of hyphen-minus and full stops (``---``, +``--``, ``...``) to em-dash, en-dash, and ellipsis Unicode characters +respectively. + +Supported values: + +booleans_ (yes/no) + Use smart quotes? + +alt (or "alternative") + Use alternative quote set (if defined for the language). + +Default: "no". Option: ``--smart-quotes``. + +New in Docutils 0.10. + +.. _SmartQuotes: smartquotes.html +__ smartquotes.html#localization +.. _quote characters: + https://en.wikipedia.org/wiki/Non-English_usage_of_quotation_marks + + +smartquotes_locales +~~~~~~~~~~~~~~~~~~~ + +Typographical quotes used by the SmartQuotes_ transform. + +A comma-separated list_ with language tag and a set of four quotes (primary +open/close, secondary open/close)smartquotes_locales. (If more than one +character shall be used for a quote (e.g. padding in French quotes), a +colon-separated list may be used.) + +Example: + Ensure a correct leading apostrophe in ``'s Gravenhage`` in Dutch (at the + cost of incorrect opening single quotes) and set French quotes to double + and single guillemets with inner padding:: + + smartquote-locales: nl: „”’’, + fr: « : »:‹ : › + +Default: None. Option: ``--smartquotes-locales``. + +New in Docutils 0.14. + +syntax_highlight +~~~~~~~~~~~~~~~~ + +Token type names used by Pygments_ when parsing contents of the code_ +directive and role. + +Supported values: + +long + Use hierarchy of long token type names. +short + Use short token type names. (For use with + `Pygments-generated stylesheets`_.) +none + No code parsing. Use this to avoid the "Pygments not + found" warning when Pygments is not installed. + +Default: "long". Option: ``--syntax-highlight``. + +New in Docutils 0.9. + +.. _Pygments: https://pygments.org/ +.. _code: ../ref/rst/directives.html#code +.. _Pygments-generated stylesheets: + https://pygments.org/docs/cmdline/#generating-styles + +tab_width +~~~~~~~~~ + +Number of spaces for hard tab expansion. + +Default: 8. Option: ``--tab-width``. + +trim_footnote_reference_space +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Remove spaces before `footnote references`_? + +Default: None [#]_ + +Options: ``--trim-footnote-reference-space, --leave-footnote-reference-space``. + +.. [#] Depending on the writer-specific `footnote_references setting`_. + The footnote space is trimmed if the reference style is "superscript", + and it is left if the reference style is "brackets". + + +.. _myst: + +[myst parser] +------------- + +Provided by the 3rd party package `myst-docutils`_. +See `MyST with Docutils`_ and its `Sphinx configuration options`_ +(some settings are not available with Docutils). + +.. _myst-docutils: https://pypi.org/project/myst-docutils/ +.. _MyST with Docutils: + https://myst-parser.readthedocs.io/en/latest/docutils.html +.. _Sphinx configuration options: + https://myst-parser.readthedocs.io/en/latest/sphinx/reference.html#sphinx-config-options + + +.. _pycmark: + +[pycmark parser] +---------------- + +Provided by the 3rd party package `pycmark`__. +Currently no configuration settings. + +__ https://pypi.org/project/pycmark/ + + +.. _recommonmark: + +[recommonmark parser] +--------------------- + +Provisional, depends on (deprecated) 3rd-party package recommonmark__. +Currently no configuration settings. + +__ https://pypi.org/project/recommonmark/ + + +[readers] +========= + + +[standalone reader] +------------------- + +docinfo_xform +~~~~~~~~~~~~~ + +Enable or disable the `bibliographic field list`_ transform +(docutils.transforms.frontmatter.DocInfo). + +Default: enabled (True). Options: ``--no-doc-info``. + +doctitle_xform +~~~~~~~~~~~~~~ + +Enable or disable the promotion of a lone top-level section title +to `document title`_ (and subsequent section title to document +subtitle promotion; docutils.transforms.frontmatter.DocTitle). + +Default: enabled (True). Options: ``--no-doc-title``. + +sectsubtitle_xform +~~~~~~~~~~~~~~~~~~ + +Enable or disable the promotion of the title of a lone subsection +to a subtitle (docutils.transforms.frontmatter.SectSubTitle). + +Default: disabled (False). +Options: ``--section-subtitles, --no-section-subtitles``. + + +[pep reader] +------------ + +The `pep_references`_ and `rfc_references`_ settings +(`[restructuredtext parser]`_) are set on by default. + + +.. [python reader] + --------------- + + Not implemented. + + +[writers] +========= + +[docutils_xml writer] +--------------------- + +.. Caution:: + + * The XML declaration carries text encoding information. If the encoding + is not UTF-8 or ASCII and the XML declaration is missing, standard + tools may be unable to read the generated XML. + +doctype_declaration +~~~~~~~~~~~~~~~~~~~ + +Generate XML with a DOCTYPE declaration. + +Default: do (True). Options: ``--no-doctype``. + +indents +~~~~~~~ + +Generate XML with indents and newlines. + +Default: don't (None). Options: ``--indents``. + +newlines +~~~~~~~~ + +Generate XML with newlines before and after tags. + +Default: don't (None). Options: ``--newlines``. + + +.. _xml_declaration [docutils_xml writer]: + +xml_declaration +~~~~~~~~~~~~~~~ + +Generate XML with an XML declaration. +See also `xml_declaration [html writers]`_. + +Default: do (True). Option: ``--no-xml-declaration``. + + +[html writers] +-------------- + +.. _attribution [html writers]: + +attribution +~~~~~~~~~~~ + +Format for `block quote`_ attributions: one of "dash" (em-dash +prefix), "parentheses"/"parens", or "none". +See also `attribution [latex writers]`_. + +Default: "dash". Option: ``--attribution``. + + +cloak_email_addresses +~~~~~~~~~~~~~~~~~~~~~ + +Scramble email addresses to confuse harvesters. In the reference +URI, the "@" will be replaced by %-escapes (as of RFC 1738). In +the visible text (link text) of an email reference, the "@" and +all periods (".") will be surrounded by ``<span>`` tags. +Furthermore, HTML entities are used to encode these characters in +order to further complicate decoding the email address. For +example, "abc@example.org" will be output as:: + + <a class="reference" href="mailto:abc%40example.org"> + abc<span>@</span>example<span>.</span>org</a> + +.. Note:: While cloaking email addresses will have little to no + impact on the rendering and usability of email links in most + browsers, some browsers (e.g. the ``links`` browser) may decode + cloaked email addresses incorrectly. + +Default: don't cloak (None). Option: ``--cloak-email-addresses``. + +compact_lists +~~~~~~~~~~~~~ + +Remove extra vertical whitespace between items of `bullet lists`_ and +`enumerated lists`_, when list items are all "simple" (i.e., items +each contain one paragraph and/or one "simple" sub-list only). The +behaviour can be specified directly via "class" attributes (values +"compact" and "open") in the document. + +Default: enabled (True). +Options: ``--compact-lists, --no-compact-lists``. + +compact_field_lists +~~~~~~~~~~~~~~~~~~~ + +Remove extra vertical whitespace between items of `field lists`_ that +are "simple" (i.e., all field bodies each contain at most one +paragraph). The behaviour can be specified directly via "class" +attributes (values "compact" and "open") in the document. + +Default: enabled (True). +Options: ``--compact-field-lists, --no-compact-field-lists``. + + +.. _embed_stylesheet [html writers]: + +embed_stylesheet +~~~~~~~~~~~~~~~~ + +Embed the stylesheet in the output HTML file. The stylesheet file +must specified by the stylesheet_path_ setting and must be +accessible during processing. +See also `embed_stylesheet [latex writers]`_. + +Default: enabled. +Options: ``--embed-stylesheet, --link-stylesheet``. + + +.. _footnote_references setting: +.. _footnote_references [html writers]: + +footnote_references +~~~~~~~~~~~~~~~~~~~ + +Format for `footnote references`_, one of "superscript" or "brackets". +See also `footnote_references [latex writers]`_. + +Overrides [#override]_ trim_footnote_reference_space_, +if the parser supports this option. + +Default: "brackets". Option: ``--footnote-references``. + +initial_header_level +~~~~~~~~~~~~~~~~~~~~ + +The initial level for header elements. This does not affect the +document title & subtitle; see doctitle_xform_. + +Default: writer dependent (see `[html4css1 writer]`_, `[html5 writer]`_, +`[pep_html writer]`_). +Option: ``--initial-header-level``. + + +math_output +~~~~~~~~~~~ + +The format of mathematical content (`math directive`_ and role) in +the output document. Supported values are (case insensitive): + +:HTML: + Format math in standard HTML enhanced by CSS rules. + Requires the ``math.css`` stylesheet (in the system + `stylesheet directory <stylesheet_dirs [html writers]_>`__) + + A `stylesheet_path <stylesheet_path [html writers]_>`__ + can be appended after whitespace. The specified + stylesheet(s) will only be referenced or embedded if required + (i.e. if there is mathematical content in the document). + +:MathJax: + Format math for display with MathJax_, a JavaScript-based math rendering + engine. + + Pro: + Works across multiple browsers and platforms. + + Large set of `supported LaTeX math commands and constructs`__ + + __ http://docs.mathjax.org/en/latest/input/tex/macros/index.html + + Con: + Rendering requires JavaScript and an Internet connection or local + MathJax installation. + + A URL pointing to a MathJax library should be appended after whitespace. + A warning is given if this is missing. + + * It is recommended to install__ the MathJax library on the same + server as the rest of the deployed site files. + + __ https://www.mathjax.org/#installnow + + Example: Install the library at the top level of the web + server’s hierarchy in the directory ``MathJax`` and set:: + + math-output: mathjax /MathJax/MathJax.js + + * The easiest way to use MathJax is to link directly to a public + installation. In that case, there is no need to install MathJax locally. + + Downside: Downloads JavaScript code from a third-party site --- opens + the door to cross-site scripting attacks! + + Example: MathJax `getting started`__ documentation uses:: + + math-output: mathjax + https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js + + See https://www.jsdelivr.com/ for details and terms of use. + + __ https://www.mathjax.org/#gettingstarted + + * Use a local MathJax installation on the *client* machine, e.g.:: + + math-output: MathJax file:/usr/share/javascript/mathjax/MathJax.js + + This is the fallback if no URL is specified. + +:MathML: + Embed math content as presentational MathML_. + + Pro: + The W3C recommendation for math on the web. + + Self-contained documents (no JavaScript, no external downloads). + + Con: + Limited `browser support`__. + + Docutil's latex2mathml converter supports only a + `subset of LaTeX math syntax`__. + + With the "html4css1" writer, the resulting HTML document does + not validate, as there is no DTD for `MathML + XHTML Transitional`. + However, MathML-enabled browsers will render it fine. + + __ https://developer.mozilla.org/en-US/docs/Web/MathML + #browser_compatibility + __ ../ref/rst/mathematics.html + + + An external converter can be appended after whitespace, e.g., + ``--math-output="MathML latexml"``: + + blahtexml_ + Fast conversion, support for many symbols and environments, but no + "align" (or other equation-aligning) environment. (C++) + + LaTeXML_ + Comprehensive macro support but *very* slow. (Perl) + + TtM_ + No "matrix", "align" and "cases" environments. Support may be removed. + +:LaTeX: + Include literal LaTeX code. + + The failsafe fallback. + +Default: HTML math.css. Option: ``--math-output``. + +New in Docutils 0.8. + +.. _math directive: ../ref/rst/directives.html#math +.. _MathJax: http://www.mathjax.org/ +.. _MathPlayer: http://www.dessci.com/en/products/mathplayer/ +.. _MathML: https://www.w3.org/TR/MathML/ +.. _blahtexml: http://gva.noekeon.org/blahtexml/ +.. _LaTeXML: http://dlmf.nist.gov/LaTeXML/ +.. _TtM: http://hutchinson.belmont.ma.us/tth/mml/ + + +.. _stylesheet [html writers]: + +stylesheet +~~~~~~~~~~ + +A comma-separated list of CSS stylesheet URLs, used verbatim. +See also `stylesheet [latex writers]`_. + +Overrides also stylesheet_path_. [#override]_ + +Default: None. Option: ``--stylesheet``. + + +.. _stylesheet_dirs [html writers]: + +stylesheet_dirs +~~~~~~~~~~~~~~~ + +A comma-separated list of directories where stylesheets can be found. +Used by the stylesheet_path_ setting when expanding relative path arguments. + +Note: This setting defines a "search path" (similar to the PATH variable for +executables). However, the term "path" is already used in the +stylesheet_path_ setting with the meaning of a file location. + + +Default: the working directory of the process at launch and the directory +with default stylesheet files (writer and installation specific). +Use the ``--help`` option to get the exact value. +Option: ``--stylesheet-dirs``. + + +.. _stylesheet_path: +.. _stylesheet_path [html writers]: + +stylesheet_path +~~~~~~~~~~~~~~~ + +A comma-separated list of paths to CSS stylesheets. Relative paths are +expanded if a matching file is found in the stylesheet_dirs__. +If embed_stylesheet__ is False, paths are rewritten relative to the +output HTML file. +See also `stylesheet_path [latex writers]`_. + +Also overrides "stylesheet". [#override]_ +Pass an empty string (to either "stylesheet" or "stylesheet_path") to +deactivate stylesheet inclusion. + +Default: writer dependent (see `[html4css1 writer]`_, `[html5 writer]`_, +`[pep_html writer]`_). +Option: ``--stylesheet-path``. + +__ `embed_stylesheet [html writers]`_ +__ `stylesheet_dirs [html writers]`_ + + +.. _table_style [html writers]: + +table_style +~~~~~~~~~~~ + +Class value(s) added to all tables_. +See also `table_style [latex writers]`_. + +The default CSS sylesheets define: + + borderless + No borders around the table. + + align-left, align-center, align-right + Align the tables + +The HTML5 stylesheets also define: + + booktabs + Only lines above and below the table and a thin line after the head. + + captionbelow + Place the table caption below the table + (New in Docutils 0.17). + +In addition, the HTML writers support: + + colwidths-auto + Delegate the determination of table column widths to the back-end + (leave out the ``<colgroup>`` column specification). + Overridden by the "widths" option of the `table directive`_. + + colwidths-grid + Backwards compatibility setting. Write column widths + determined from the source to the HTML file. + Overridden by the "widths" option of the `table directive`_. + +Default: "". Option: ``--table-style``. + +.. _table directive: ../ref/rst/directives.html#table + + +.. _template [html writers]: + +template +~~~~~~~~ + +Path to template file, which must be encoded in UTF-8. [#pwd]_ +See also `template [latex writers]`_. + +Default: "template.txt" in the writer's directory (installed automatically; +for the exact machine-specific path, use the ``--help`` option). +Option: ``--template``. + + +.. _xml_declaration [html writers]: + +xml_declaration +~~~~~~~~~~~~~~~ + +Prepend an XML declaration. +See also `xml_declaration [docutils_xml writer]`_. + +.. Caution:: The XML declaration carries text encoding information. If the + encoding is not UTF-8 or ASCII and the XML declaration is missing, + standard XML tools may be unable to read the generated XHTML. + +Default: writer dependent. +Options: ``--xml-declaration``, ``--no-xml-declaration``. + + +[html4css1 writer] +~~~~~~~~~~~~~~~~~~ + +The `HTML4/CSS1 Writer`_ generates output that conforms to the +`XHTML 1 Transitional`_ specification. +It shares all settings defined in the `[html writers]`_ +`configuration section`_. + + +Writer Specific Defaults +"""""""""""""""""""""""" + +`initial_header_level`_ + 1 (for "<h1>") + +`stylesheet_path <stylesheet_path [html writers]_>`__: + "html4css1.css" + +`xml_declaration <xml_declaration [html writers]_>`__ + enabled (True) + +.. _HTML4/CSS1 Writer: html.html#html4css1 +.. _XHTML 1 Transitional: https://www.w3.org/TR/xhtml1/ + + +field_name_limit +"""""""""""""""" + +The maximum width (in characters) for one-column `field names`_. Longer +field names will span an entire row of the table used to render the field +list. 0 indicates "no limit". See also option_limit_. + +Default: 14 (i.e. 14 characters). Option: ``--field-name-limit``. + + +option_limit +"""""""""""" + +The maximum width (in characters) for options in `option lists`_. +Longer options will span an entire row of the table used to render +the option list. 0 indicates "no limit". +See also field_name_limit_. + +Default: 14 (i.e. 14 characters). Option: ``--option-limit``. + + +[html5 writer] +~~~~~~~~~~~~~~ + +The `HTML5 Writer`_ generates valid XML that is compatible with `HTML5`_. +It shares all settings defined in the `[html writers]`_ +`configuration section`_. + +New in Docutils 0.13. + +.. _HTML5 Writer: html.html#html5-polyglot +.. _HTML5: https://www.w3.org/TR/2014/REC-html5-20141028/ + +Writer Specific Defaults +"""""""""""""""""""""""" + +`initial_header_level`_ + 2 (for "<h2>", cf. the "`The h1, h2, h3, h4, h5, and h6 elements`__" + in the HTML Standard) + +`stylesheet_path <stylesheet_path [html writers]_>`__: + "minimal.css, plain.css" + +__ https://html.spec.whatwg.org/multipage/sections.html + #the-h1,-h2,-h3,-h4,-h5,-and-h6-elements + +embed_images +"""""""""""" + +Deprecated. Obsoleted by image_loading_. + + +image_loading +""""""""""""" + +Suggest at which point images should be loaded. + +:embed: If the image can be read from the local file system, + the image data is embedded into the HTML document. + +:link: Link to image in the HTML document (default). + +:lazy: Link to image. Specify the `lazy loading attribute`_ to defer + fetching the image. + +Default: "link". Option: ``--image-loading``. + +New in Docutils 0.18. + +.. _base64: https://en.wikipedia.org/wiki/Base64 +.. _data URI: https://en.wikipedia.org/wiki/Data_URI_scheme +.. _lazy loading attribute: https://html.spec.whatwg.org/multipage/ + urls-and-fetching.html#lazy-loading-attributes + + +section_self_link +""""""""""""""""" + +Append an empty anchor element with a ``href`` to the section to +section headings. See ``responsive.css`` for an example how this can be +styled to show a symbol allowing users to copy the section's URL. + +Default: disabled (False). +Options: ``--section-self-link``, ``--no-section-self-link``. + +New in Docutils 0.18. + + +[pep_html writer] +~~~~~~~~~~~~~~~~~ + +The PEP/HTML Writer derives from the HTML4/CSS1 Writer, and shares +all settings defined in the `[html writers]`_ and `[html4css1 writer]`_ +`configuration sections`_. + +Writer Specific Defaults +"""""""""""""""""""""""" + +`initial_header_level`_ + 1 (for "<h1>") + +`stylesheet_path <stylesheet_path [html writers]_>`__: + "pep.css" + +`template <template [html writers]_>`__: + ``docutils/writers/pep_html/template.txt`` in the installation + directory. For the exact machine-specific path, use the ``--help`` + option. + +no_random +""""""""" +Do not use a random banner image. Mainly used to get predictable +results when testing. + +Default: random enabled (None). Options: ``--no-random`` (hidden). + +pep_home +"""""""" + +Home URL prefix for PEPs. + +Default: current directory ("."). Option: ``--pep-home``. + +python_home +""""""""""" +Python's home URL. + +Default: parent directory (".."). Option: ``--python-home``. + + +[s5_html writer] +~~~~~~~~~~~~~~~~ + +The S5/HTML Writer derives from the HTML4/CSS1 Writer, and shares +all settings defined in the `[html writers]`_ and `[html4css1 writer]`_ +`configuration sections`_. + +Writer Specific Defaults +"""""""""""""""""""""""" + +compact_lists_: + disable compact lists. + +template__: + ``docutils/writers/s5_html/template.txt`` in the installation + directory. For the exact machine-specific path, use the ``--help`` + option. + +__ `template [html writers]`_ + + +hidden_controls +""""""""""""""" + +Auto-hide the presentation controls in slideshow mode, or or keep +them visible at all times. + +Default: auto-hide (True). +Options: ``--hidden-controls``, ``--visible-controls``. + +current_slide +""""""""""""" + +Enable or disable the current slide indicator ("1/15"). + +Default: disabled (None). +Options: ``--current-slide``, ``--no-current-slide``. + +overwrite_theme_files +""""""""""""""""""""" + +Allow or prevent the overwriting of existing theme files in the +``ui/<theme>`` directory. This has no effect if "theme_url_" is +used. + +Default: keep existing theme files (None). +Options: ``--keep-theme-files``, ``--overwrite-theme-files``. + +theme +""""" + +Name of an installed S5 theme, to be copied into a ``ui/<theme>`` +subdirectory, beside the destination file (output HTML). Note +that existing theme files will not be overwritten; the existing +theme directory must be deleted manually. +Also overrides the "theme_url_" setting. [#override]_ + +Default: "default". Option: ``--theme``. + +theme_url +""""""""" + +The URL of an S5 theme directory. The destination file (output +HTML) will link to this theme; nothing will be copied. Also overrides +the "theme_" setting. [#override]_ + +Default: None. Option: ``--theme-url``. + +view_mode +""""""""" + +The initial view mode, either "slideshow" or "outline". + +Default: "slidewhow". Option: ``--view-mode``. + +.. ------------------------------------------------------------ + +[latex writers] +---------------- + +Common settings for the `LaTeX writers`_ +`[latex2e writer]`_ and `[xetex writer]`_. + +.. _LaTeX writers: latex.html + + +.. _attribution [latex writers]: + +attribution +~~~~~~~~~~~ + +See `attribution [html writers]`_. + +compound_enumerators +~~~~~~~~~~~~~~~~~~~~ + +Enable or disable compound enumerators for nested `enumerated lists`_ +(e.g. "1.2.a.ii"). + +Default: disabled (None). +Options: ``--compound-enumerators``, ``--no-compound-enumerators``. + +documentclass +~~~~~~~~~~~~~ + +Specify LaTeX documentclass. + +Default: "article". Option: ``--documentclass``. + +documentoptions +~~~~~~~~~~~~~~~ + +Specify document options. Multiple options can be given, separated by +commas. + +Default: "a4paper". Option: ``--documentoptions``. + + +docutils_footnotes +~~~~~~~~~~~~~~~~~~ +Use the Docutils-specific macros ``\DUfootnote`` and +``\DUfootnotetext`` for footnotes_. + +TODO: The alternative, "use_latex_footnotes" is not implemented yet. + +Default: on. Option: ``--docutils-footnotes``. + + +.. _embed_stylesheet [latex writers]: + +embed_stylesheet +~~~~~~~~~~~~~~~~ + +Embed the stylesheet(s) in the header of the output file. The +stylesheets must be accessible during processing. Currently, this +fails if the file is not available via the given path (i.e. the +file is *not* searched in the `TeX input path`_). +See also `embed_stylesheet [html writers]`_. + +Default: off. Options: ``--embed-stylesheet, --link-stylesheet``. + + +.. _footnote_references [latex writers]: + +footnote_references +~~~~~~~~~~~~~~~~~~~ + +Format for `footnote references`_: one of "superscript" or "brackets". +See also `footnote_references [html writers]`_. + +Overrides [#override]_ trim_footnote_reference_space_, +if the parser supports this option. + +Default: "superscript". Option: ``--footnote-references``. + + +graphicx_option +~~~~~~~~~~~~~~~ + +LaTeX graphicx package option. + +Possible values are "dvips", "pdftex", "dvipdfmx". + +Default: "". Option: ``--graphicx-option``. + +hyperlink_color +~~~~~~~~~~~~~~~ + +Color of any hyperlinks embedded in text. + +* "0" or "false" disable coloring of links. (Links will be marked + by red boxes that are not printed), +* "black" results in “invisible“ links, + +Set hyperref_options_ to "draft" to completely disable hyperlinking. + +Default: "blue". Option: ``--hyperlink-color``. + +hyperref_options +~~~~~~~~~~~~~~~~ + +Options for the `hyperref TeX package`_. If hyperlink_color_ is +not "false", the expansion of :: + + 'colorlinks=true,linkcolor=%s,urlcolor=%s' % ( + hyperlink_color, self.hyperlink_color + +is prepended. + +Default: "". Option: ``--hyperref-options``. + +.. _hyperref TeX package: http://tug.org/applications/hyperref/ + + +latex_preamble +~~~~~~~~~~~~~~ + +LaTeX code that will be inserted in the document preamble. +Can be used to load packages with options or (re-) define LaTeX +macros without writing a custom style file (new in Docutils 0.7). + +Default: writer dependent (see `[latex2e writer]`_, `[xetex writer]`_). +Option: ``--latex-preamble``. + + +legacy_class_functions +~~~~~~~~~~~~~~~~~~~~~~ + +Use legacy functions ``\DUtitle`` and ``\DUadmonition`` with a +comma-separated list of class values as optional argument. If `False`, class +values are handled with wrappers and admonitions use the ``DUadmonition`` +environment. See `Generating LaTeX with Docutils`__ for details. + +Default: False (changed in Docutils 0.18). +Options: ``--legacy-class-functions``, ``--new-class-functions``. + +New in Docutils 0.17. + +__ latex.html#classes + + +legacy_column_widths +~~~~~~~~~~~~~~~~~~~~ + +Use "legacy algorithm" or new algorithm to determine table column widths. + +The new algorithm limits the table width to the text width or specified +table width and keeps the ratio of specified column widths. + +Custom table and/or column widths can be set with the respective options +of the `table directive`_. See also `Generating LaTeX with Docutils`__. + +Default: True (will change to False in 0.19). +Options: ``--legacy-column-widths``, ``--new-column-widths``. + +New in Docutils 0.18. + +__ latex.html#table-style + + +literal_block_env +~~~~~~~~~~~~~~~~~ + +When possible\ [#]_, use the specified environment for `literal blocks`_. + +Default: "" (quoting of whitespace and special chars). +Option: ``--literal-block-env``. + +.. [#] A literal-block element may originate from a `parsed literal`_. + A LaTeX verbatim environment is only usable it does not contain + inline elements. + +.. _parsed literal: ../ref/rst/directives.html#parsed-literal + + +reference_label +~~~~~~~~~~~~~~~ + +Per default the latex-writer puts the reference title into +hyper references. Specify "ref*" or "pageref*" to get the section +number or the page number. + +Default: "" (use hyper references). Option: ``--reference-label``. + +section_enumerator_separator +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The separator between section number prefix and enumerator for +compound enumerated lists (see `compound_enumerators`_). + +Generally it isn't recommended to use both sub-sections and nested +enumerated lists with compound enumerators. This setting avoids +ambiguity in the situation where a section "1" has a list item +enumerated "1.1", and subsection "1.1" has list item "1". With a +separator of ".", these both would translate into a final compound +enumerator of "1.1.1". With a separator of "-", we get the +unambiguous "1-1.1" and "1.1-1". + +Default: "-". Option: ``--section-enumerator-separator``. + + + +section_prefix_for_enumerators +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Enable or disable section ("." subsection ...) prefixes for +compound enumerators. This has no effect unless +`compound_enumerators`_ are enabled. + +Default: disabled (None). +Options: ``--section-prefix-for-enumerators``, +``--no-section-prefix-for-enumerators``. + + +.. _stylesheet [latex writers]: + +stylesheet +~~~~~~~~~~ + +A comma-separated list_ of style files. +See also `stylesheet [html writers]`_. + +Overrides also stylesheet_path__. [#override]_ + +If `embed_stylesheet`__ is False (default), the stylesheet files are +referenced with ``\usepackage`` (extension ``.sty`` or no extension) or +``\input`` (any other extension). + +LaTeX will search the specified files in the `TeX input path`_. + +Default: no stylesheet (""). Option: ``--stylesheet``. + +__ `stylesheet_path [latex writers]`_ +__ `embed_stylesheet [latex writers]`_ +.. _TeX input path: + http://www.tex.ac.uk/cgi-bin/texfaq2html?label=what-TDS + + +.. _stylesheet_dirs [latex writers]: + +stylesheet_dirs +~~~~~~~~~~~~~~~ + +A comma-separated list of directories where stylesheets can be found. +Used by the stylesheet_path__ setting. + +Note: This setting defines a "search path" (similar to the PATH variable for +executables). However, the term "path" is already used in the +stylesheet_path__ setting with the meaning of a file location. + +__ +__ `stylesheet_path [latex writers]`_ + +Default: the working directory of the process at launch and the directory +with default stylesheet files (writer and installation specific). +Use the ``--help`` option to get the exact value. +Option: ``--stylesheet-dirs``. + + +.. _stylesheet_path [latex writers]: + +stylesheet_path +~~~~~~~~~~~~~~~ + +A comma-separated list of style files. Relative paths are expanded if a +matching file is found in the stylesheet_dirs__. +If embed_stylesheet__ is False, paths are rewritten relative to the +output file path. Run ``latex`` from the directory containing +the output file. +See also `stylesheet_path [html writers]`_. + +The stylesheet__ option is preferred for files in the `TeX input path`_. + +Also overrides stylesheet__. [#override]_ + +Default: no stylesheet (""). Option: ``--stylesheet-path``. + +__ `stylesheet_dirs [latex writers]`_ +__ `embed_stylesheet [latex writers]`_ +__ +__ `stylesheet [latex writers]`_ + + +.. _table_style [latex writers]: + +table_style +~~~~~~~~~~~ + +Specify the default style for tables_. +See also `table_style [html writers]`_. + +Supported values: "booktabs", "borderless", "colwidths-auto", and "standard". +See `Generating LaTeX with Docutils`__ for details. + +Default: "standard". Option: ``--table-style``. + +__ latex.html#tables + + +.. _template [latex writers]: + +template +~~~~~~~~ + +Path [#pwd]_ to template file, which must be encoded in UTF-8. +See also `template [html writers]`_. + +Default: writer dependent (see `[latex2e writer]`_, `[xetex writer]`_). +Option: ``--template``. + + +use_bibtex +~~~~~~~~~~ +Specify style and database for the experimental `BibTeX` support, for +example:: + + --use-bibtex=mystyle,mydb1,mydb2 + +Default: "" (don't use BibTeX). Option ``--use-bibtex``. + +use_latex_abstract +~~~~~~~~~~~~~~~~~~ + +Use LaTeX abstract environment for the document's abstract_. + +Default: off. Options: ``--use-latex-abstract, --topic-abstract``. + +use_latex_citations +~~~~~~~~~~~~~~~~~~~ + +Use \cite for citations_ instead of a simulation with figure-floats. + +Default: off. Options: ``--use-latex-citations, --figure-citations``. + +use_latex_docinfo +~~~~~~~~~~~~~~~~~ + +Attach author and date to the `document title`_ +instead of the `bibliographic fields`_. + +Default: off. Options: ``--use-latex-docinfo, --use-docutils-docinfo``. + +use_latex_toc +~~~~~~~~~~~~~ + +To get page numbers in the `table of contents`_, it +must be generated by LaTeX. Usually latex must be run twice to get +numbers correct. + +Default: on. Options: ``--use-latex-toc, --use-docutils-toc``. + +use_part_section +~~~~~~~~~~~~~~~~ + +Add parts on top of the section hierarchy. + +Default: don't (None). Option: ``--use-part-section``. + +[latex2e writer] +~~~~~~~~~~~~~~~~ + +The `LaTeX2e writer`_ generates a LaTeX source for compilation with 8-bit +LaTeX (pdfTeX_). It shares all settings defined in the `[latex writers]`_ +`configuration section`_. + +.. _LaTeX2e writer: latex.html#latex2e-writer +.. _pdfTeX: https://www.tug.org/applications/pdftex/ +.. _configuration section: `Configuration File Sections & Entries`_ + + +Writer Specific Defaults +"""""""""""""""""""""""" + +latex_preamble_ + Load the "PDF standard fonts" (Times, Helvetica, Courier):: + + \usepackage{mathptmx} % Times + \usepackage[scaled=.90]{helvet} + \usepackage{courier} + +template__ + "default.tex" in the ``docutils/writers/latex2e/`` directory + (installed automatically). + + __ `template [latex writers]`_ + + +font_encoding +""""""""""""" + +Specify `LaTeX font encoding`_. Multiple options can be given, separated by +commas. The last value becomes the document default. +Possible values are "", "T1", "OT1", "LGR,T1" or any other combination of +`LaTeX font encodings`_. + +Default: "T1". Option: ``--font-encoding``. + +.. _LaTeX font encoding: latex.html#font-encoding +.. _LaTeX font encodings: + http://mirror.ctan.org/macros/latex/doc/encguide.pdf + +[xetex writer] +~~~~~~~~~~~~~~ + +The `XeTeX writer`_ generates a LaTeX source for compilation with `XeTeX or +LuaTeX`_. It derives from the latex2e writer, and shares all settings +defined in the `[latex writers]`_ and `[latex2e writer]`_ `configuration +sections`_. + +.. _XeTeX writer: latex.html#xetex-writer +.. _XeTeX or LuaTeX: https://texfaq.org/FAQ-xetex-luatex +.. _configuration sections: `Configuration File Sections & Entries`_ + +Writer Specific Defaults +"""""""""""""""""""""""" + +latex_preamble_: + Font setup for `Linux Libertine`_,:: + + % Linux Libertine (free, wide coverage, not only for Linux) + \setmainfont{Linux Libertine O} + \setsansfont{Linux Biolinum O} + \setmonofont[HyphenChar=None]{DejaVu Sans Mono} + + The optional argument ``HyphenChar=None`` to the monospace font + prevents word hyphenation in literal text. + +.. _Linux Libertine: http://www.linuxlibertine.org/ + +template__: + "xelatex.tex" in the ``docutils/writers/latex2e/`` directory + (installed automatically). + + .. TODO: show full path with ``--help`` (like in the HTML writers) + and add the following line: + for the exact machine-specific path, use the ``--help`` option). + + __ `template [latex writers]`_ + + +[odf_odt writer] +---------------- + +The following command line options are specific to ``odtwriter``: + +stylesheet +~~~~~~~~~~ + +Specify a stylesheet URL, used verbatim. + +Default: writers/odf_odt/styles.odt in the installation directory. + +odf-config-file +~~~~~~~~~~~~~~~ + +Specify a configuration/mapping file relative to the current working +directory for additional ODF options. In particular, this file may +contain a section named "Formats" that maps default style names to names +to be used in the resulting output file allowing for adhering to external +standards. For more info and the format of the configuration/mapping +file, see the `Odt Writer for Docutils`_ document. + +cloak-email-addresses +~~~~~~~~~~~~~~~~~~~~~ + +Obfuscate email addresses to confuse harvesters while still +keeping email links usable with standards-compliant browsers. + +no-cloak-email-addresses +~~~~~~~~~~~~~~~~~~~~~~~~ +Do not obfuscate email addresses. + +table-border-thickness +~~~~~~~~~~~~~~~~~~~~~~ + +Specify the thickness of table borders in thousands of a cm. +Default is 35. + +add-syntax-highlighting +~~~~~~~~~~~~~~~~~~~~~~~ + +Add syntax highlighting in literal code blocks. + +no-syntax-highlighting +~~~~~~~~~~~~~~~~~~~~~~ + +Do not add syntax highlighting in literal code blocks. +(default) + +create-sections +~~~~~~~~~~~~~~~ + +Create sections for headers. (default) + +no-sections +~~~~~~~~~~~ + +Do not create sections for headers. + +create-links +~~~~~~~~~~~~ +Create links. + +no-links +~~~~~~~~ + +Do not create links. (default) + +endnotes-end-doc +~~~~~~~~~~~~~~~~ + +Generate endnotes at end of document, not footnotes at bottom of page. + +no-endnotes-end-doc +~~~~~~~~~~~~~~~~~~~ + +Generate footnotes at bottom of page, not endnotes at end of +document. (default) + +generate-list-toc +~~~~~~~~~~~~~~~~~ + +Generate a bullet list table of contents, not an +ODF/``oowriter`` table of contents. + +generate-oowriter-toc +~~~~~~~~~~~~~~~~~~~~~ + +Generate an ODF/``oowriter`` table of contents, not a bullet +list. (default) **Note:** ``odtwriter`` is not able to +determine page numbers, so you will need to open the generated +document in ``oowriter``, then right-click on the table of +contents and select "Update" to insert page numbers. + +custom-odt-header +~~~~~~~~~~~~~~~~~ + +Specify the contents of a custom header line. For details about +custom headers and about special field character sequences, see +section "Custom header/footers: inserting page numbers, date, +time, etc" in the `Odt Writer for Docutils`_ document for +details. + +custom-odt-footer +~~~~~~~~~~~~~~~~~ + +Specify the contents of a custom footer line. For details about +custom footers and about special field character sequences, see +section "Custom header/footers: inserting page numbers, date, +time, etc" in the `Odt Writer for Docutils`_ document for +details. + +.. _Odt Writer for Docutils: odt.html + + +[pseudoxml writer] +------------------ + +detailed +~~~~~~~~~ + +Pretty-print <#text> nodes. + +Default: False. Option: ``--detailed``. + + +[applications] +============== + +[buildhtml application] +----------------------- + +dry_run +~~~~~~~ + +Do not process files, show files that would be processed. + +Default: False. Option: ``--dry-run``. + +ignore +~~~~~~ + +List_ of wildcard (shell globing) patterns, specifying files to silently +ignore. To specify multiple patterns, use colon-separated patterns (in +configuration files or on the command line); on the command line, the +option may also be used more than once. + +Default: None. Option: ``--ignore``. + +prune +~~~~~ + +List_ of directories not to process. To specify multiple +directories, use colon-separated paths (in configuration files or +on the command line); on the command line, the option may also be +used more than once. + +Default: ['.hg', '.bzr', '.git', '.svn', 'CVS']. Option: +``--prune``. + +recurse +~~~~~~~ + +Recursively scan subdirectories, or ignore subdirectories. + +Default: recurse (True). Options: ``--recurse, --local``. + +silent +~~~~~~ + +Work silently (no progress messages). Independent of +"report_level". + +Default: show progress (None). Option: ``--silent``. + +.. _html_writer: +.. _writer [buildhtml application]: + +writer +~~~~~~ + +`HTML writer`_ version. One of "html", "html4", "html5". + +Default: "html" (use Docutils' default HTML writer). +Option: ``--writer`` + +New in 0.17. Obsoletes the ``html_writer`` option. + +.. _HTML writer: html.html + + +[docutils application] +-------------------------- + +New in 0.17. Config file support added in 0.18. +Renamed in 0.19 (the old name "docutils-cli application" is kept as alias). +Support for reader/parser import names added in 0.19. + +reader +~~~~~~ +Reader component name. +One of "standalone", "pep", +or the import name of a drop-in reader module. + +Default: "standalone". +Option: ``--reader`` + +parser +~~~~~~ +Parser component name. +Either "rst" (default) or the import name of a drop-in parser module. + +Parsers for CommonMark_ known to work with Docutils include "pycmark_", +"myst_", and "recommonmark_". + +Default: "rst". +Option: ``--parser`` + +.. _CommonMark: https://spec.commonmark.org/0.30/ + + +.. _writer [docutils application]: + +writer +~~~~~~ +Writer component name. +One of "html", "html4", "html5", "latex", "xelatex", "odt", "xml", +"pseudoxml", "manpage", "pep_html", "s5", an alias, +or the import name of a drop-in writer module. + +Default: "html5". +Option: ``--writer`` + + +Other Settings +============== + +Command-Line Only +----------------- + +These settings are only effective as command-line options; setting +them in configuration files has no effect. + +config +~~~~~~ + +Path to a configuration file to read (if it exists). [#pwd]_ +Settings may override defaults and earlier settings. The config +file is processed immediately. Multiple ``--config`` options may +be specified; each will be processed in turn. + +Filesystem path settings contained within the config file will be +interpreted relative to the config file's location (*not* relative +to the current working directory). + +Default: None. Option: ``--config``. + + +Internal Settings +----------------- + +These settings are for internal use only; setting them in +configuration files has no effect, and there are no corresponding +command-line options. + +_config_files +~~~~~~~~~~~~~ + +List of paths of applied configuration files. + +Default: None. No command-line options. + +_directories +~~~~~~~~~~~~ + +(``buildhtml.py`` front end.) List of paths to source +directories, set from positional arguments. + +Default: current working directory (None). No command-line +options. + +_disable_config +~~~~~~~~~~~~~~~ + +Prevent standard configuration files from being read. For +programmatic use only. + +Default: config files enabled (None). No command-line options. + +_destination +~~~~~~~~~~~~ + +Path to output destination, set from positional arguments. + +Default: stdout (None). No command-line options. + +_source +~~~~~~~ + +Path to input source, set from positional arguments. + +Default: stdin (None). No command-line options. + +-------------------------------------------------------------------------- + +.. _language tag: https://www.w3.org/International/articles/language-tags/ +.. _BCP 47: https://www.rfc-editor.org/rfc/bcp/bcp47.txt +.. _ISO 639: http://www.loc.gov/standards/iso639-2/php/English_list.php +.. _ISO 3166: http://www.iso.ch/iso/en/prods-services/iso3166ma/ + 02iso-3166-code-lists/index.html + +.. [#pwd] Path relative to the working directory of the process at + launch. + +.. [#override] The overridden setting will automatically be set to + ``None`` for command-line options and config file settings. Client + programs which specify defaults that override other settings must + do the overriding explicitly, by assigning ``None`` to the other + settings. + + +------------------------------ +Old-Format Configuration Files +------------------------------ + +Formerly, Docutils configuration files contained a single "[options]" +section only. This was found to be inflexible, and in August 2003 +Docutils adopted the current component-based configuration file +sections as described above. +Up to version 2.0, Docutils will still recognize the old "[options]" +section, but complain with a deprecation warning. + +To convert existing config files, the easiest way is to change the +section title: change "[options]" to "[general]". Most settings +haven't changed. The only ones to watch out for are these: + +===================== ===================================== +Old-Format Setting New Section & Setting +===================== ===================================== +pep_stylesheet [pep_html writer] stylesheet +pep_stylesheet_path [pep_html writer] stylesheet_path +pep_template [pep_html writer] template +===================== ===================================== + +.. References + +.. _abstract: +.. _bibliographic field list: +.. _bibliographic fields: + ../ref/rst/restructuredtext.html#bibliographic-fields +.. _block quote: ../ref/rst/restructuredtext.html#block-quotes +.. _citations: ../ref/rst/restructuredtext.html#citations +.. _bullet lists: ../ref/rst/restructuredtext.html#bullet-lists +.. _enumerated lists: ../ref/rst/restructuredtext.html#enumerated-lists +.. _field lists: ../ref/rst/restructuredtext.html#field-lists +.. _field names: ../ref/rst/restructuredtext.html#field-names +.. _footnotes: ../ref/rst/restructuredtext.html#footnotes +.. _footnote references: ../ref/rst/restructuredtext.html#footnote-references +.. _inline markup recognition rules: + ../ref/rst/restructuredtext.html#inline-markup-recognition-rules +.. _literal blocks: ../ref/rst/restructuredtext.html#literal-blocks +.. _option lists: ../ref/rst/restructuredtext.html#option-lists +.. _tables: ../ref/rst/restructuredtext.html#tables +.. _table of contents: ../ref/rst/directives.html#contents diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/emacs.txt b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/emacs.txt new file mode 100644 index 00000000..2bf28a78 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/emacs.txt @@ -0,0 +1,960 @@ +.. -*- coding: utf-8 -*- + +======================================== + Emacs Support for reStructuredText +======================================== + +:Authors: Stefan Merten <stefan@merten-home.de>, Martin Blais + <blais@furius.ca> +:Version: ``rst.el`` V1.4.1 +:Abstract: + + High-level description of the existing Emacs_ support for editing + reStructuredText_ text documents. Suggested setup code and usage + instructions are provided. + +.. contents:: + +Introduction +============ + +reStructuredText_ is a syntax for simple text files that allows a +tool set - docutils_ - to extract generic document structure. For +people who use Emacs_, there is a package that adds a major mode that +supports editing the syntax of reStructuredText_: ``rst.el``. This +document describes the features it provides, and how to setup your +Emacs_ to use them and how to invoke them. + +Installation +============ + +Emacs_ support for reStructuredText_ is implemented as an Emacs_ major +mode (``rst-mode``) provided by the ``rst.el`` Emacs_ package. + +Emacs_ distributions contain ``rst.el`` since version V23.1. However, +a significantly updated version of ``rst.el`` is contained in Emacs_ +V24.3. This document describes the version of ``rst.el`` contained in +Emacs_ V24.3 and later versions. This version of ``rst.el`` has the +internal version V1.4.1. + +If you have Emacs_ V24.3 or later you do not need to install anything +to get reST support. If you have an Emacs_ between V23.1 and V24.2 you +may use the version of ``rst.el`` installed with Emacs_ or install a +more recent one locally_ (recommended). In other cases you need to +install ``rst.el`` locally_ to get reST support. + +Checking situation +------------------ + +Here are some steps to check your situation: + +#. In Emacs_ switch to an empty buffer and try :: + + M-x rst-mode + + If this works you have ``rst.el`` installed somewhere. You can see + that it works if you find a string ``ReST`` in Emacs' modeline of + the current buffer. If this doesn't work you need to install + ``rst.el`` yourself locally_. + +#. In the buffer you just switched to ``rst-mode`` try :: + + C-h v rst-version + + If this fails you have a version of ``rst.el`` older than + V1.1.0. Either you have an old ``rst.el`` locally or you are using + an Emacs_ between V23.1 and V24.2. In this case it is recommended + that you install a more recent version of ``rst.el`` locally_. + + You may also try :: + + C-h v emacs-version + + to find out your Emacs_ version. + +#. Check the version of ``rst.el`` + + The content of ``rst-version`` gives you the internal version of + ``rst.el``. The version contained in Emacs_ V24.3 and described here + is V1.4.0. If you have an older version you may or may not install + a more recent version of ``rst.el`` locally_. + +.. _locally: + +Local installation +------------------ + +If you decided to install locally please follow these steps. + +#. Download ``rst.el`` + + Download the most recent published version of ``rst.el`` from + https://sourceforge.net/p/docutils/code/HEAD/tree/trunk/docutils/tools/editors/emacs/rst.el + +#. Put ``rst.el`` to a directory in ``load-path`` + + Use :: + + C-h v load-path + + If in the resulting list you find a directory in your home + directory put ``rst.el`` in this directory. + + Make sure the directory is one of the first entries in + ``load-path``. Otherwise a version of ``rst.el`` which came with + Emacs_ may be found before your local version. + + In Emacs_ see the info node ``Init File Examples`` for more + information on how to set up your Emacs_ initialization + machinery. Try :: + + C-h i + mEmacs<Return> + sInit File Examples<Return> + +#. Enable ``rst-mode`` + + Add the following to your Emacs_ initialization setup :: + + (require 'rst) + + After you restarted Emacs_ ``rst.el`` is loaded and ready to be + used. + +Switching ``rst-mode`` on +------------------------- + +By default ``rst-mode`` is switched on for files ending in ``.rst`` or +``.rest``. If in a buffer you want to switch ``rst-mode`` on manually +use :: + + M-x rst-mode + +If you want to use ``rst-mode`` in files with other extensions modify +``auto-mode-alist`` to automatically turn it on whenever you visit +reStructuredText_ documents:: + + (setq auto-mode-alist + (append '(("\\.txt\\'" . rst-mode) + ("\\.rst\\'" . rst-mode) + ("\\.rest\\'" . rst-mode)) auto-mode-alist)) + +Put the extensions you want in the correct place in the example +above. Add more lines if needed. + +If have local variables enabled (try ``C-h v enable-local-variables`` +to find out), you can also add the following at the top of your +documents to trigger rst-mode:: + + .. -*- mode: rst -*- + +Or this at the end of your documents:: + + .. + Local Variables: + mode: rst + End: + +Key bindings +============ + +``rst-mode`` automatically binds several keys for invoking special +functions for editing reStructuredText_. Since ``rst-mode`` contains a +lot of functionality most key bindings consist of three +keystrokes. + +Following the Emacs_ conventions for major modes the key bindings of +``rst-mode`` start with ``C-c C-<letter>``. The second key stroke +selects a group of key bindings: + +C-c C-a + Commands to adjust the section headers and work with the hierarchy + they build. + +C-c C-c + Commands to compile the current reStructuredText_ document to + various output formats. + +C-c C-l + Commands to work with lists of various kinds. + +C-c C-r + Commands to manipulate the current region. + +C-c C-t + Commands to create and manipulate a table of contents. + +At any stage of typing you may use ``C-h`` to get help on the +available key bindings. I.e. ``C-c C-h`` gives you help on all key +bindings while ``C-c C-r C-h`` gives you help on the commands for +regions. This is handy if you forgot a certain key binding. + +Additional key bindings which have a certain meaning in other Emacs_ +modes are reused in ``rst-mode`` so you don't have to learn a +different set of key bindings for editing reStructuredText_. + +In ``rst-mode`` try :: + + C-h m + +to list all mode specific key bindings. Most of the key bindings are +described in this tutorial. + +.. note:: The key bindings have been completely revamped in ``rst.el`` + V1.0.0. This was necessary to make room for new + functionality. Some of the old bindings still work but give + a warning to use the new binding. In the output of ``C-h m`` + these bindings show up as ``rst-deprecated-...``. The old + bindings will be removed completely in a later version. + +Section Adornments +================== + +``rst-mode`` recognizes the section adornments building the section +hierarchy of the document. Section adornments are the underlines or +under- and overlines used to mark a section title. There are a couple +of commands to work with section adornments. These commands are bound +to key bindings starting with ``C-c C-a``. + +Adjusting a Section Title +------------------------- + +There is a function that helps a great deal to maintain these +adornments: ``rst-adjust`` (bound to ``C-c C-a C-a``, ``C-c C-=``, and +``C-=``). This function is a Swiss army knife that can be invoked +repeatedly and whose behavior depends on context: + +#. If there is an incomplete adornment, e.g. :: + + My Section Title + == + + invocation will complete the adornment. It can also be used to + adjust the length of the existing adornment when you need to edit + the title. + +#. If there is no section adornment at all, by default an adornment of + the same level as the last encountered section level is added. You + can simply enter a few characters of the title and invoke the + function to create the section adornment. + + The variable ``rst-new-adornment-down`` can be customized to create + one level lower adornments than the previous section title instead + of keeping the level. + +#. If there is already a section adornment, it is promoted one level + up. You can invoke it like this repeatedly to cycle the title + through the hierarchy of existing adornments. + +Invoking the function with a negative prefix argument, e.g. ``C-- +C-=``, will effectively reverse the direction of adornment cycling. +To alternate between underline-only and over-and-under styles, you can +use a regular prefix argument, e.g. ``C-u C-=``. See the +documentation of ``rst-adjust`` for more description of the prefix +arguments to alter the behavior of the function. + +Promoting and Demoting Many Sections +------------------------------------ + +When you are re-organizing the structure of a document, it can be +useful to change the level of a number of section titles. The same +key binding can be used to do that: if the region is active when the +binding is invoked, all the section titles that are within the region +are promoted accordingly (or demoted, with negative prefix argument). + +Redoing All the Adornments to Your Taste +---------------------------------------- + +If you open someone else's file and the adornments it contains are +unfamiliar, you may want to readjust them to fit your own preferred +hierarchy of adornments. This can be difficult to perform by hand. +However, you can do this easily by invoking +``rst-straighten-adornments`` (``C-c C-a C-s``), which operates on the +entire buffer. + +Customizations for Adornments +----------------------------- + +You can customize the variable ``rst-preferred-adornments`` to a list +of the adornments that you like to use for documents. + +If you prefer adornments according to +http://sphinx-doc.org/rest.html#sections you may customize it to end +up with a value like this:: + + ((35 over-and-under 0) ; ?# + (42 over-and-under 0) ; ?* + (61 simple 0) ; ?= + (45 simple 0) ; ?- + (94 simple 0) ; ?^ + (34 simple 0)) ; ?" + +This will become the default in a later version of ``rst.el``. + +If you set ``rst-preferred-adornments`` to nil resembling the empty +list only the section adornment found in the buffer will be used. + +Viewing the Hierarchy of Section Adornments +------------------------------------------- + +You can visualize the hierarchy of the section adornments in the +current buffer by invoking ``rst-display-adornments-hierarchy``, bound +on ``C-c C-a C-d``. A temporary buffer will appear with fake section +titles rendered in the style of the current document. This can be +useful when editing other people's documents to find out which section +adornments correspond to which levels. + +Movement and Selection +====================== + +Movement and Selection for Sections +----------------------------------- + +You can move the cursor between the different section titles by using +the ``rst-backward-section`` (``C-M-a``) and ``rst-forward-section`` +(``C-M-e``). To mark the section that cursor lies in, use +``rst-mark-section`` (``C-M-h``). + +The key bindings are modeled after other modes with similar +functionality. + +Movements and Selection for Text Blocks +--------------------------------------- + +The understanding of reStructuredText_ of ``rst-mode`` is used to set +all the variables influencing Emacs' understanding of paragraphs. Thus +all operations on paragraphs work as usual. For instance +``forward-paragraph`` (``M-}``) works as usual. + +Indenting and Filling +===================== + +Indentation of text plays a major role in the syntax of +reStructuredText_. It is tedious to maintain the indentation +manually. ``rst-mode`` understands most of the structure of +reStructuredText_ allowing for sophisticated indentation and filling +support described in this section. + +Indenting Text Blocks +--------------------- + +``rst-mode`` supports indentation of text blocks by the command +``rst-shift-region`` (``C-c C-r TAB``). Mark a region and use ``C-c +C-r TAB`` to indent all blocks one tab to the right. Use ``M-- C-c C-r +TAB`` to indent the region one tab to the left. + +You may use arbitrary prefix arguments such as ``M-2`` or ``M-- 2`` to +determine the number of tabs you want to indent. A prefix of ``M-0`` +removes all indentation in the active region. + +A tab is an indentation making sense for the block at hand in +reStructuredText_ syntax. In some cases the exact indentation depends +on personal taste. You may customize a couple of variables ``M-x +customize-group<RET> rst-indent<RET>`` to match your taste. + +Indenting Lines While Typing +---------------------------- + +In Emacs_ the ``TAB`` key is often used for indenting the current +line. ``rst-mode`` implements this for the sophisticated indentation +rules of reStructuredText_. Pressing ``TAB`` cycles through the +possible tabs for the current line. In the same manner +``newline-and-indent`` (``C-j``) indents the new line properly. + +This is very handy while writing lists. Consider this +reStructuredText_ bullet list with the cursor at ``@``:: + + * Level 1 + + * Level 2@ + +Type ``C-j`` twice to get this:: + + * Level 1 + + * Level 2 + + @ + +Now you an enter text at this level, or start a new list item by +typing another ``*``. Or you may type ``TAB`` to reduce the +indentation once:: + + * Level 1 + + * Level 2 + + @ + +Typing another ``TAB`` gets you to the first level:: + + * Level 1 + + * Level 2 + + @ + +.. note:: Since Emacs_ V24.4 ``electric-indent-mode`` is globally on. + This breaks indentation in ``rst-mode`` and renders + ``rst-mode`` mostly useless. This is fixed in V1.4.1 of + ``rst-mode``. + + A quick fix for older versions of ``rst.el`` is to add the + following line at the end of the ``(define-derived-mode + rst-mode ...`` block in your copy of ``rst.el``:: + + (setq electric-indent-inhibit t) + + You may also install V1.4.1 or newer locally_. + +Filling +------- + +``rst-mode`` understanding the indentation rules of reStructuredText_ +also supports filling paragraphs. Just use ``fill-paragraph`` +(``M-q``) as you do in other modes. + +Operating on Lists +================== + +Lists are supported in various flavors in reStructuredText_. +``rst-mode`` understands reStructuredText_ lists and offers some +support for operating on lists. Key bindings for commands for +operating on lists start with ``C-c C-l``. + +Please note that so far definition lists are not explicitly supported +by ``rst-mode``. + +Bulleted and Enumerated Lists +----------------------------- + +If you have a couple of plain lines you want to turn into an +enumerated list you can invoke ``rst-enumerate-region`` (``C-c C-l +C-e``). For example, the following region :: + + Apples + + Oranges + + Bananas + +becomes :: + + 1. Apples + + 2. Oranges + + 3. Bananas + +``rst-bullet-list-region`` (``C-c C-l C-b``) does the same, but +results in a bullet list :: + + * Apples + + * Oranges + + * Bananas + +By default, each paragraph starting on the leftmost line in the +highlighted region will be taken to be a single list or enumeration +item, for example, enumerating the following:: + + An apple a day + keeps the doctor away. + + But oranges + are tastier than apples. + + If you preferred bananas + you may be + a monkey. + +Will result in:: + + 1. An apple a day + keeps the doctor away. + + 2. But oranges + are tastier than apples. + + 3. If you preferred bananas + you may be + a monkey. + +If you would like to enumerate each of the lines, use a prefix +argument on the preceding commands, e.g.:: + + Apples + Oranges + Bananas + +becomes:: + + * Apples + * Oranges + * Bananas + +Straightening Existing Bullet List Hierarchies +---------------------------------------------- + +If you invoke ``rst-straighten-bullets-region`` (``C-c C-l C-s``), the +existing bullets in the active region will be replaced to reflect +their respective level. This does not make a difference in the +document structure that reStructuredText_ defines, but looks better +in, for example, if all of the top-level bullet items use the +character ``-``, and all of the 2nd level items use ``*``, etc. + +Inserting a List Item +--------------------- + +To start a new list you may invoke ``rst-insert-list`` (``C-c C-l +C-i``). You may choose from an item style supported by +reStructuredText_. + +You may also invoke ``rst-insert-list`` at the end of a list item. In +this case it inserts a new line containing the markup for the a list +item on the same level. + +Operating on Other Text Blocks +============================== + +Creating and Removing Line Blocks +--------------------------------- + +To create line blocks, first select the region to convert and invoke +``rst-line-block-region`` ``C-c C-r C-l``. For example, the following +:: + + Apples + Oranges + Bananas + +becomes :: + + | Apples + | Oranges + | Bananas + +This works even if the region is indented. To remove line blocks, +select a region and invoke with a prefix argument. + +Commenting a Region of Text +--------------------------- + +``rst-mode`` understands reStructuredText_ comments. Use +``comment-dwim`` (``M-;``) to work on comments as usual:: + + Apples + Oranges + Bananas + +becomes:: + + .. + Apples + Oranges + Bananas + +To remove a comment you have to tell this to ``comment-dwim`` +explicitly by using a prefix argument (``C-u M-;``). + +Please note that only indented comments are supported properly by the +parts of ``comment-dwim`` working on regions. + +.. _Conversion: + +Converting Documents from Emacs +=============================== + +``rst-mode`` provides a number of functions for running documents +being edited through the docutils tools. The key bindings for these +commands start with ``C-c C-c``. + +The main generic function is ``rst-compile`` (``C-c C-c C-c``). It +invokes a compilation command with the correct output name for the +current buffer and then invokes Emacs' compile function. It also looks +for the presence of a ``docutils.conf`` configuration file in the +parent directories and adds it to the command line options. There is also +``rst-compile-alt-toolset`` (``C-c C-c C-a``) in case you often need +run your document in a second toolset. + +You can customize the commands being used by setting +``rst-compile-primary-toolset`` and ``rst-compile-secondary-toolset``. + +Other commands are available for other formats: + +* ``rst-compile-pseudo-region`` (``C-c C-c C-x``) + + When crafting documents, it is often convenient to view which data + structures docutils will parse them into. You can use to run the + active region through ``rst2pseudoxml.py`` and have the output + automatically be displayed in a new buffer. + +* ``rst-compile-pdf-preview`` (``C-c C-c C-p``) + + Convert the current document to PDF and launch a viewer on the + results. + +* ``rst-compile-slides-preview`` (``C-c C-c C-s``): Convert the + current document to S5 slides and view in a web browser. + +Imenu Support +============= + +Using Imenu +----------- + +Emacs_ has a package called ``imenu``. ``rst-mode`` supports Imenu by +adding a function to convert the structure of a reStructuredText_ +buffer to an Imenu index. Thus you can use invoke ``imenu`` (``M-x +imenu``) to navigate through the section index or invoke +``imenu-add-to-menubar`` (``M-x imenu-add-to-menubar``) to add an +Imenu menu entry to Emacs' menu bar. + +Using which function +-------------------- + +As a side effect of Imenu support the ``which-func`` package is also +supported. Invoke ``which-function-mode`` (``M-x +which-function-mode``) to add the name of the current section to the +mode line. This is especially useful if you navigate through documents +with long sections which do not fit on a single screen. + +Using the Table of Contents +=========================== + +The sections in a reStructuredText_ document can be used to form a +table of contents. ``rst-mode`` can work with such a table of contents +in various forms. Key bindings for these commands start with ``C-c +C-t``. + +Navigating Using the Table of Contents +-------------------------------------- + +When you are editing long documents, it can be a bit difficult to +orient yourself in the structure of your text. To that effect, a +function is provided that presents a hierarchically indented table of +contents of the document in a temporary buffer, in which you can +navigate and press ``Return`` to go to a specific section. + +Invoke ``rst-toc`` (``C-c C-t C-t``). It presents a temporary buffer +that looks something like this:: + + Table of Contents: + Debugging Meta-Techniques + Introduction + Debugging Solution Patterns + Recognize That a Bug Exists + Subdivide and Isolate + Identify and Verify Assumptions + Use a Tool for Introspection + Change one thing at a time + Learn about the System + Understanding a bug + The Basic Steps in Debugging + Attitude + Bad Feelings + Good Feelings + References + +When you move the cursor to a section title and press ``RET`` or ``f`` +or click with ``button1`` on a section title, the temporary buffer +disappears and you are left with the cursor positioned at the chosen +section. Clicking with ``button2`` jumps to the respective section but +keeps the toc buffer. You can use this to look at the various section +headers quickly. Use ``q`` in this buffer to just quit it without +moving the cursor in the original document. Use ``z`` to zap the +buffer altogether. + +Inserting a Table of Contents +----------------------------- + +Oftentimes in long text documents that are meant to be read directly, +a table of contents is inserted at the beginning of the text. In +reStructuredText_ documents, since the table of contents is +automatically generated by the parser with the ``.. contents::`` +directive, people generally have not been adding an explicit table of +contents to their source documents, and partly because it is too much +trouble to edit and maintain. + +The Emacs_ support for reStructuredText_ provides a function to insert +such a table of contents in your document. Since it is not meant to +be part of the document text, you should place such a table of +contents within a comment, so that it is ignored by the parser. This +is the favored usage:: + + .. contents:: + .. + 1 Introduction + 2 Debugging Solution Patterns + 2.1 Recognize That a Bug Exists + 2.2 Subdivide and Isolate + 2.3 Identify and Verify Assumptions + 2.4 Use a Tool for Introspection + 2.5 Change one thing at a time + 2.6 Learn about the System + 3 Understanding a bug + 4 The Basic Steps in Debugging + 5 Attitude + 5.1 Bad Feelings + 5.2 Good Feelings + 6 References + +Just place the cursor at the top-left corner where you want to insert +the TOC and invoke the function ``rst-toc-insert`` with ``C-c C-t +C-i``. The table of contents will display all the section titles that +are under the location where the insertion occurs. This way you can +insert local table of contents by placing them in the appropriate +location. + +You can use a numeric prefix argument to limit the depth of rendering +of the TOC. + +You can customize the look of the TOC by setting the values of the +following variables: ``rst-toc-indent``, ``rst-toc-insert-style``, +``rst-toc-insert-max-level``. + +Maintaining the Table of Contents Up-to-date +-------------------------------------------- + +One issue is that you will probably want to maintain the inserted +table of contents up-to-date. ``rst-toc-update`` (``C-c C-t C-u``) +will automatically update an inserted table of contents following a +``.. contents::`` directive laid out like the example above. + +Syntax Highlighting via Font-Lock +================================= + +``rst-mode`` provides syntax highlighting for nearly all to +reStructuredText_ constructs. + +Use ``customize-group rst-faces`` to customize the faces used for +font-locking. + +Customization +============= + +Some aspects of ``rst-mode`` can be configured through the +customization feature of Emacs_. Try :: + + M-x customize-group<RETURN>rst + +for all customizations or use the respective menu entry. Those +customizations which are useful for many people are described in this +section. + +Customizing Section Title Formatting +------------------------------------ + +For a couple of things the reStructuredText_ syntax offers a choice of +options on how to do things exactly. Some of these choices influence +the operation of ``rst.el`` and thus can be configured. The +customizations are contained in the ``rst-adjust`` group. + +Among these things is the exact layout of section adornments. In fact +reStructuredText_ prescribes only the characters and how these +characters must be used but the exact use of concrete adornments may +be different in every source file. Using the customization option +``rst-preferred-adornments`` you can tell ``rst-mode`` on the exact +sequence of adornments you prefer to markup the different levels of +sections headers. + +Finally the title text of over-and-under adornments may be indented in +reStructuredText_. ``rst-default-indent`` tells ``rst-mode`` how many +positions a over-and-under adornment should be indented when toggling +from simple adornment and in case a consistent indentation throughout +the whole buffer for such adornment is needed. + +Customizing Indentation +----------------------- + +reStructuredText_ uses indentation a lot to signify a certain meaning. +In some cases the exact amount of indentation is prescribed by the +syntax while in some cases the exact indentation is not fixed. The +customization group ``rst-indent`` allows to customize the amount of +indentation in these cases. + +In field lists the content of a field needs to be indented relative to +the field label. ``rst-indent-field`` tells ``rst-mode`` the amount of +indentation to use for field content. A value of zero always indents +according to the content after the field label. + +The indentation of literal blocks is controlled by +``rst-indent-literal-normal`` and ``rst-indent-literal-minimized``. +The first is used when the leading literal tag (``::``) appears alone +on a line. The second is used when the minimized style is used where +the literal tag follows some text. + +The indentation of comments is controlled by ``rst-indent-comment``. +Of course this makes only sense for the indented comments of +reStructuredText_. + +Customization option ``rst-indent-width`` gives the default +indentation when there are no other hints on what amount of +indentation to use. + +Customizing Faces +----------------- + +The faces used for font-locking can be defined in the ``rst-faces`` +customization group. The customization options ending in ``-face`` are +only there for backward compatibility so please leave them as they +are. + +reStructuredText_ sets no limit on the nesting of sections. By default +there are six levels of fontification defined. Section titles deeper +than six level have no special fontification - only the adornments are +fontified. The exact mapping from a level to a face is done by by +``rst-adornment-faces-alist``, however. So if you need fontification +deeper than six levels you may want to customize this option. You may +also want to customize it if you like the general idea of section +title fontification in ``rst-mode`` but for instance prefer a reversed +order. + +Customizing Conversion +---------------------- + +Conversion_ can be customized by the customization options in the +customization group ``rst-compile``. + +If some conversion does not work as expected please check +the variable ``rst-compile-toolsets`` :: + + M-x customize-option<RETURN>rst-compile-toolsets + +This variable defines the commands and other details used for +conversion. In case of problems please check that the commands are +either available or customize them to what is available in your +environment. + +.. note:: There are some options in V1.4.1 of ``rst.el`` which should + be customization options but are not yet. Customization + support will be added in a later version. + +.. note:: Please note that there is a package ``rst2pdf`` based on the + ReportLab library. Please note that the command of this + package requires an additional ``-o`` for naming the output + file. This breaks the usual conventions employed by Docutils + tools. ``rst-mode`` V1.4.1 does not support this directly. + +Other Customizations +-------------------- + +``rst-preferred-bullets`` can be customized to hold your preferred set +of bullets to use for bulleted lists. + +``rst-mode-hook`` is a normal major mode hook which may be customized. +It is run if you enter ``rst-mode``. + +Related aspects +=============== + +This section covers some general aspects using Emacs_ for editing +reStructuredText_ source. They are not directly related to +``rst-mode`` but may enhance your experience. + +``text-mode`` Settings +---------------------- + +Consult the Emacs_ manual for more ``text-mode`` customizations. In +particular, you may be interested in setting the following variables, +functions and modes that pertain somewhat to ``text-mode``: + +* ``indent-tabs-mode`` +* ``colon-double-space`` +* ``sentence-end-double-space`` +* ``auto-fill-mode`` +* ``auto-mode-alist`` + +Editing Tables: Emacs table mode +-------------------------------- + +You may want to check out `Emacs table mode`_ to create an edit +tables, it allows creating ASCII tables compatible with +reStructuredText_. + +.. _Emacs table mode: http://table.sourceforge.net/ + +Character Processing +-------------------- + +Since reStructuredText punts on the issue of character processing, +here are some useful resources for Emacs_ users in the Unicode world: + +* `xmlunicode.el and unichars.el from Norman Walsh + <http://nwalsh.com/emacs/xmlchars/index.html>`__ + +* `An essay by Tim Bray, with example code + <http://www.tbray.org/ongoing/When/200x/2003/09/27/UniEmacs>`__ + +* For Emacs_ users on Mac OS X, here are some useful useful additions + to your .emacs file. + + - To get direct keyboard input of non-ASCII characters (like + "option-e e" resulting in "é" [eacute]), first enable the option + key by setting the command key as your meta key:: + + (setq mac-command-key-is-meta t) ;; nil for option key + + Next, use one of these lines:: + + (set-keyboard-coding-system 'mac-roman) + (setq mac-keyboard-text-encoding kTextEncodingISOLatin1) + + I prefer the first line, because it enables non-Latin-1 characters + as well (em-dash, curly quotes, etc.). + + - To enable the display of all characters in the Mac-Roman charset, + first create a fontset listing the fonts to use for each range of + characters using charsets that Emacs_ understands:: + + (create-fontset-from-fontset-spec + "-apple-monaco-medium-r-normal--10-*-*-*-*-*-fontset-monaco, + ascii:-apple-monaco-medium-r-normal--10-100-75-75-m-100-mac-roman, + latin-iso8859-1:-apple-monaco-medium-r-normal--10-100-75-75-m-100-mac-roman, + mule-unicode-0100-24ff:-apple-monaco-medium-r-normal--10-100-75-75-m-100-mac-roman") + + Latin-1 doesn't cover characters like em-dash and curly quotes, so + "mule-unicode-0100-24ff" is needed. + + Next, use that fontset:: + + (set-frame-font "fontset-monaco") + + - To enable cooperation between the system clipboard and the Emacs_ + kill ring, add this line:: + + (set-clipboard-coding-system 'mac-roman) + + Other useful resources are in `Andrew Choi's Emacs 21 for Mac OS X + FAQ <http://members.shaw.ca/akochoi-emacs/stories/faq.html>`__. + +Credits +======= + +Part of the original code of ``rst.el`` has been written by Martin +Blais and David Goodger and Wei-Wei Guo. The font-locking came from +Stefan Merten. + +Most of the code has been modified, enhanced and extended by Stefan +Merten who also is the current maintainer of ``rst.el``. + +.. _Emacs: https://www.gnu.org/software/emacs/emacs.html +.. _reStructuredText: https://docutils.sourceforge.io/rst.html +.. _Docutils: https://docutils.sourceforge.io/ + + + +.. LocalWords: reST utf Merten Blais rst el docutils modeline emacs +.. LocalWords: Init mEmacs sInit alist setq txt overlines RET nd py +.. LocalWords: dwim conf toolset pseudoxml pdf Imenu imenu menubar +.. LocalWords: func toc xmlunicode unichars eacute charset fontset +.. LocalWords: kTextEncodingISOLatin charsets monaco ascii latin +.. LocalWords: iso unicode Choi's Goodger Guo + +.. + Local Variables: + mode: rst + indent-tabs-mode: nil + fill-column: 70 + End: diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/html.txt b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/html.txt new file mode 100644 index 00000000..2b7f9e55 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/html.txt @@ -0,0 +1,238 @@ +===================== +Docutils HTML writers +===================== + +.. contents:: + +html +---- + +`html` is an alias for the default Docutils HTML writer. +Currently, `html` is mapped to html4css1_. + +The target may change with the development of HTML, browsers, Docutils, and +the web. + +* Use ``get_writer_by_name('html')`` or the rst2html.py_ front end, if you + want the output to be up-to-date automatically. + +* Use a specific writer name or front end, if you depend on stability of the + generated HTML code, e.g. because you use a custom style sheet or + post-processing that may break otherwise. + + +html4css1 +--------- + +:aliases: html4, html_ +:front-ends: rst2html4.py, rst2html.py_ +:config: `[html4css1 writer]`_ + +The HTML Writer module, ``docutils/writers/html4css1.py``, was the first +Docutils writer and up to release 0.13 the only official HTML writer. + +The output conforms to the `XHTML 1 Transitional`_ specification. It does +not validate as `HTML 4.01 Transitional`_ due to the closing of empty tags +required in XML but not allowed in HTML 4. However, the output follows the +`HTML Compatibility Guidelines`_ for proper rendering on most HTML user +agents. + +Correct rendering depends on a CSS_ style sheet. A reference style sheet, +`html4css1.css`_, is provided and used by default. + +To support the `Internet Explorer` (with a market share of about 90% around +2002, the time this writer was written), documents contain some hard-coded +formatting hints and are tagged as "text/html" (instead of +"application/xhtml+xml"). [#IE]_ + +.. [#IE] Conformance to `CSS 2.1`_ has been added in IE 8 (2009), support + for XHTML in IE 9 (2011). + +.. _rst2html.py: tools.html#rst2html-py +.. _[html4css1 writer]: config.html#html4css1-writer +.. _html4css1.css: ../../docutils/writers/html4css1/html4css1.css + +pep_html +~~~~~~~~ + +:front-end: rstpep2html.py_ +:config: `[pep_html writer]`_ + +This is a special writer for the generation of `Python Enhancement +Proposals`_ (PEPs). It inherits from html4css1_ and adds some `PEP-specific +options`_, a style sheet and template. The front-end uses also a specialised +reader. + +.. _rstpep2html.py: tools.html#rstpep2html-py +.. _PEP-specific options: +.. _[pep_html writer]: config.html#pep-html-writer +.. _Python Enhancement Proposals: https://peps.python.org/ + +s5_html +~~~~~~~ + +:alias: s5 +:front-end: rst2s5.py_ +:config: `[s5_html writer]`_ + +The `s5` writer inherits from html4css1_. It produces XHTML for use with +S5_, the “Simple Standards-based Slide Show System” by Eric Meyer. See +`Easy Slide Shows With reST & S5`_ for details. + +.. _rst2s5.py: tools.html#rst2s5-py +.. _[s5_html writer]: config.html#s5-html-writer +.. _Easy Slide Shows With reST & S5: slide-shows.html +.. _S5: http://meyerweb.com/eric/tools/s5/ +.. _theme: tools.html#themes + + +html5_polyglot +-------------- + +:aliases: html5 +:front-end: rst2html5.py_ +:config: `[html5 writer]`_ + +The ``html5_polyglot`` writer generates `polyglot HTML`_ [#]_ output, valid +XML [#safetext]_ that is compatible with `HTML5`_. New features and elements +are used if they are widely supported. + +There is no hard-coded formatting information in the HTML document. +Correct rendering of elements not directly supported by HTML depends on a +CSS_ style sheet. The provided style sheet minimal.css_ defines required +styling rules; plain.css_ and responsive.css_ add optional rules for +better legibility. Adaption of the layout is possible with `custom style +sheets`_. [#safetext]_ + +New in Docutils 0.13 + +.. [#] see also `Benefits of polyglot XHTML5`_ +.. [#safetext] The validity of raw HTML and custom stylesheets must be + ensured by the author. + +.. _rst2html5.py: tools.html#rst2html5-py +.. _[html5 writer]: config.html#html5-writer +.. _minimal.css: ../../docutils/writers/html5_polyglot/minimal.css +.. _plain.css: ../../docutils/writers/html5_polyglot/plain.css +.. _responsive.css: ../../docutils/writers/html5_polyglot/responsive.css +.. _custom style sheets: ../howto/html-stylesheets.html +.. _viewable with any browser: http://www.anybrowser.org/campaign +.. _Benefits of polyglot XHTML5: http://xmlplease.com/xhtml/xhtml5polyglot/ + + +Overview +-------- + +================ =========== ============== ================= =========== +name aliases `front-end`_ HTML version CSS version +================ =========== ============== ================= =========== +html4css1_ html4, rst2html4.py, `XHTML 1 `CSS 1`_ + html_ rst2html.py Transitional`_ + +pep_html_ .. rstpep2html.py `XHTML 1 `CSS 1`_ + Transitional`_ + +s5_html_ s5 rst2s5.py `XHTML 1 `CSS 1`_ + Transitional`_ + +html5_polyglot_ html5 rst2html5.py `HTML5`_ `CSS 3`_ + +================ =========== ============== ================= =========== + +For additional alternatives, see the `Docutils link list`__ and the +sandbox_. + +__ https://docutils.sourceforge.io/docs/user/links.html + #website-generators-and-html-variants +.. _sandbox: ../dev/policies.html#the-sandbox + + +References +---------- + +_`HTML5` + `HTML5, A vocabulary and associated APIs for HTML and XHTML`, + W3C Recommendation, 28 October 2014. + https://www.w3.org/TR/2014/REC-html5-20141028/ + +_`XHTML 1.1` + `XHTML™ 1.1 - Module-based XHTML - Second Edition`, + W3C Recommendation, 23 November 2010. + https://www.w3.org/TR/xhtml11/ + +_`XHTML 1 Transitional` + `Transitional version`_ of: + `XHTML™ 1.0 The Extensible HyperText Markup Language (Second + Edition)`, `A Reformulation of HTML 4 in XML 1.0`, + W3C Recommendation, 26 January 2000, revised 1 August 2002. + https://www.w3.org/TR/xhtml1/ + +_`XHTML Basic` + `XHTML™ Basic 1.1 - Second Edition`, + W3C Recommendation, 23 November 2010. + https://www.w3.org/TR/xhtml-basic/ + +.. _transitional version: + https://www.w3.org/TR/xhtml1/#a_dtd_XHTML-1.0-Transitional + +_`HTML 4.01 Transitional` + Transitional version of: + `HTML 4.01 Specification`, W3C Recommendation 24 December 1999. + https://www.w3.org/TR/html4/ + +.. _`CSS 1`: + +_`CSS Level 1`: + The features defined in the `CSS1 specification`_, but using the syntax + and definitions in the `CSS 2.1`_ specification. + +_`CSS 2.1` `Cascading Style Sheets Level 2 Revision 1 (CSS 2.1) Specification`, + W3C Recommendation 07 June 2011. + https://www.w3.org/TR/CSS21/ + +_`CSS 3`: + CSS Level 3 builds on CSS Level 2 module by module, using the CSS2.1 + specification as its core. + + Specifications: https://www.w3.org/Style/CSS/specs.en.html + + Validator: http://jigsaw.w3.org/css-validator/ + +.. other references + ---------------- + +.. _HTML Compatibility Guidelines: https://www.w3.org/TR/xhtml1/#guidelines +.. _CSS: https://www.w3.org/TR/CSS/ +.. _CSS1 specification: https://www.w3.org/TR/2008/REC-CSS1-20080411/ +.. _polyglot HTML: https://www.w3.org/TR/html-polyglot/ + + .. Beware. This specification is no longer in active maintenance and the + HTML Working Group does not intend to maintain it further. + +.. Appendix + + + On the question of Polyglot markup, there seems to be little + consensus. One line of argument suggests that, to the extent that it + is practical to obey the Robustness principle, it makes sense to do + so. That is, if you're generating HTML markup for the web, and you can + generate Polyglot markup that is also directly consumable as XML, you + should do so. Another line of argument suggests that even under the + most optimistic of projections, so tiny a fraction of the web will + ever be written in Polyglot that there's no practical benefit to + pursuing it as a general strategy for consuming documents from the + web. If you want to consume HTML content, use an HTML parser that + produces an XML-compatible DOM or event stream. + + -- https://www.w3.org/TR/html-xml-tf-report/#conclusions + + Further development + + On 2016-05-25, David Goodger wrote: + + > In addition, I'd actually like to see the HTML writer(s) with + > fully-parameterized classes, i.e. removing hard-coded *classes* as well as + > formatting. This way, any user who wants to (e.g.) write reST for use with + > Bootstrap can easily work around any naming conflicts. + +.. _front-end: tools.html diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/images/big-black.png b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/images/big-black.png new file mode 100644 index 00000000..e69de29b diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/images/big-white.png b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/images/big-white.png new file mode 100644 index 00000000..e69de29b diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/images/default.png b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/images/default.png new file mode 100644 index 00000000..e69de29b diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/images/happy_monkey.png b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/images/happy_monkey.png new file mode 100644 index 00000000..e69de29b diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/images/medium-black.png b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/images/medium-black.png new file mode 100644 index 00000000..e69de29b diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/images/medium-white.png b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/images/medium-white.png new file mode 100644 index 00000000..e69de29b diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/images/rsp-all.png b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/images/rsp-all.png new file mode 100644 index 00000000..e69de29b diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/images/rsp-breaks.png b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/images/rsp-breaks.png new file mode 100644 index 00000000..e69de29b diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/images/rsp-covers.png b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/images/rsp-covers.png new file mode 100644 index 00000000..e69de29b diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/images/rsp-cuts.png b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/images/rsp-cuts.png new file mode 100644 index 00000000..e69de29b diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/images/rsp-empty.png b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/images/rsp-empty.png new file mode 100644 index 00000000..e69de29b diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/images/rsp-objects.png b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/images/rsp-objects.png new file mode 100644 index 00000000..e69de29b diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/images/rsp.svg b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/images/rsp.svg new file mode 100644 index 00000000..e69de29b diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/images/s5-files.png b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/images/s5-files.png new file mode 100644 index 00000000..e69de29b diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/images/s5-files.svg b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/images/s5-files.svg new file mode 100644 index 00000000..e69de29b diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/images/small-black.png b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/images/small-black.png new file mode 100644 index 00000000..e69de29b diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/images/small-white.png b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/images/small-white.png new file mode 100644 index 00000000..e69de29b diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/latex.txt b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/latex.txt new file mode 100644 index 00000000..c3a2b3c6 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/latex.txt @@ -0,0 +1,2170 @@ +================================ + Generating LaTeX with Docutils +================================ + +:Author: Engelbert Gruber, Günter Milde +:Contact: docutils-develop@lists.sourceforge.net +:Revision: $Revision$ +:Date: $Date$ +:Copyright: This document has been placed in the public domain. +:Abstract: This document covers topics specific to Docutils' LaTeX_ export. + +.. contents:: +.. sectnum:: + + +LaTeX +===== + +LaTeX__, is a document preparation system for high-quality typesetting. It +is most often used for medium-to-large technical or scientific documents but +it can be used for almost any form of publishing. There exists a wide +selection of `LaTeX Documentation on the net`_ and `books on LaTeX and +related topics`_. For an introduction to LaTeX see, e.g., `LaTeX2e for +authors`_. + +__ http://www.latex-project.org/ +.. _LaTeX2e for authors: + http://www.latex-project.org/guides/usrguide.pdf +.. _LaTeX Documentation on the net: + http://www.latex-project.org/guides/ +.. _books on LaTeX and related topics: + http://www.latex-project.org/guides/books.html + + +.. _LaTeX packages: + +LaTeX document classes and packages +----------------------------------- + +Unlike HTML with CSS, LaTeX uses one common language for markup and style +definitions. Separation of content and style is realized by collecting style +definitions in LaTeX classes and packages, or the +`document preamble <LaTeX preamble_>`_. + +LaTeX document classes and packages (similar to Python modules or C +libraries) provide means to extend or modify the LaTeX language by +redefining macros or providing new ones. + +Using the `document class`_ and `style sheet`_ configuration options, you +can select from a *huge* selection of classes and packages (standard as well +as user contributed) coming with your TeX distribution or available at +CTAN_ as well as custom style sheets. + +.. _CTAN: http://www.ctan.org + + +Docutils specific LaTeX macros +------------------------------ + +Some Docutils objects have no LaTeX counterpart, they will be typeset +using a Docutils specific LaTeX *macro* (command, environment, or +length) to allow customization. By convention, special macros use the +prefix ``\DU``\ [#]_. + +The `docutils.sty`_ LaTeX package providing required definitions is +part of Docutils ≥ 0.17 and available on CTAN since 2020-09-04. +The generated LaTeX documents should be kept processable by a standard LaTeX +installation. Therefore fallback definitions are included after the `custom +style sheets`_, if a macro is required in the document and +the `stylesheet`_ setting does not include "docutils". + +* Custom `style sheets`_ can define alternative implementations with + ``\newcommand``, ``\newenvironment``, and ``\newlength`` followed by + ``\setlength``. + +* Definitions with `raw LaTeX`_ are part of the document body. Use + ``\def``, ``\renewcommand`` or ``\renewenvironment``, and ``\setlength``. + +See the test output standalone_rst_latex.tex_ for an example of the fallback +definitions and their use in the document. + +.. [#] DU for Documentation Utilities = Docutils + +.. _docutils.sty: https://ctan.org/pkg/docutils + + + +Length units +------------ + +LaTeX supports all `length units`_ defined for Docutils plus the +following less common units: + +:pt: typewriter's (or LaTeX) point (1 pt = 1/72.27 in) +:dd: didôt (1 dd = 1238/1157 pt) +:cc: cîcero (1 cc = 12 dd) +:sp: scaled point (1sp = 1/65536pt) + +.. attention:: Different definitions of the unit "pt"! + + * In Docutils (as well as CSS) the unit symbol "pt" denotes the + `Postscript point` or `DTP point`. + + * LaTeX uses "pt" for the `LaTeX point`, which is unknown to Docutils and + 0.3 % smaller. + + * The `DTP point` is available in LaTeX as "bp" (big point): + + 1 pt = 1/72.25 in < 1 bp = 1/72 in + + Lengths specified in the document with unit "pt" will be given the + unit "bp" in the LaTeX source. + + In `raw LaTeX`_ and `custom style sheets`_, the `DTP point` must be + specified as "bp", while "pt" is interpreted as `LaTeX point`. + +The default length unit (added by Docutils to length specifications +without unit) is the "DTP point". + +.. _length units: ../ref/rst/restructuredtext.html#length-units + + +PDF generation +============== + +In most cases, LaTeX code is not the desired end-format of the document. +LaTeX offers many ways to generate PDF documents from the LaTeX +source, including: + +_`pdflatex` + Generates a PDF document directly from the LaTeX file. + Export your document with the _`LaTeX2e writer` (writer + name "``latex``", frontend tool rst2latex.py_). + +_`xelatex` or _`lualatex` + The `XeTeX`_ and LuaTeX_ engines work with input files in UTF-8 encoding + and system fonts. Export your document with the _`XeLaTeX writer` (writer + name "``xetex``", frontend tool rst2xetex.py_). + +You may need to call latex two or three times to get internal references +correct. + +.. _documentoptions: config.html#documentoptions +.. _xetex: http://tug.org/xetex/ +.. _luatex: http://luatex.org/ +.. _rst2latex.py: tools.html#rst2latex-py +.. _rst2xetex.py: tools.html#rst2xetex-py + +_`rubber` + The Rubber__ wrapper for LaTeX and friends can be used to automatically + run all programs the required number of times and delete "spurious" files. + This includes processing bibliographic references or indices, as well as + compilation or conversion of figures. + +__ https://gitlab.com/latex-rubber/rubber/ + + +Configuration +============= + +.. contents:: :local: + +.. _option: + +Options/Settings +---------------- + +Options can be specified as + +* command-line options, or + +* configuration settings. + +Run ``rst2latex.py --help`` to get a list of available options; +see `Docutils Configuration`_ for details. + +.. _Docutils Configuration: config.html + +Classes +------- + +The `"classes" attribute`_ is one of the common attributes, shared by all +Docutils elements. +In HTML, the common use is to provide selection criteria for style rules in +CSS stylesheets. As there is no comparable framework for LaTeX, Docutils +emulates some of this behaviour via `Docutils specific LaTeX macros`_. +Due to LaTeX limitations, class arguments are ignored for +some elements (e.g. a rubric_). + +*Inline elements* + are handled via the ``\DUrole{}`` macro that calls the optional styling + command ``\DUrole«classargument»`` with one argument (the role content). + See `custom interpreted text roles`_. + +*Block level elements* + are wrapped in "class environments": + ``\begin{DUclass}`` calls the optional styling command + ``\DUCLASS«classargument»{}``, ``\end{DUclass}`` tries + ``\endDUCLASS«classargument»``. + +Customization is done by defining matching macros or environments. + +Example 1: + Use small caps font inside elements with class value "custom". + + *Inline elements* + The LaTeX function ``\textsc`` sets the argument in small caps:: + + \newcommand{\DUrolecustom}[1]{\textsc{#1}} + + *Block-level elements* + The LaTeX directive (macro without argument) ``\scshape`` switches to + the small caps font. Its effect is confined to the wrapper ``DUclass`` + environment:: + + \newcommand*{\DUCLASScustom}{\scshape} + +Example 2: + It is even possible to locally redefine other LaTeX macros, e.g. to + turn bullet lists with class value "enumerateitems" into enumerated + lists:: + + \newcommand*{\DUCLASSenumerateitems}{% + \renewenvironment{itemize}{\begin{enumerate}}% + {\end{enumerate}}% + } + +.. rubric:: Notes + +* Class arguments may contain numbers and hyphens, which need special + treatment in LaTeX command names (see `class directive`_). The commands + ``\csname`` and ``\endcsname`` or the special command ``\@namedef`` can + help with the definition of corresponding macros or environments, e.g.:: + + \expandafter\newcommand\csname gg1\endcsname{Definition of gg1.} + + or :: + + \makeatletter + \@namedef{DUCLASSadmonition-test}{…} + \makeatother + +* Elements can have multiple class arguments. In contrast to HTML/CSS, the + order of the class arguments cannot be ignored in LaTeX + +* Class handling differs for some elements and class values: + + * Class argument values starting with ``align-`` are transformed to + "align" argument values. Class argument values starting with + ``language-`` set the elements language property. + + * The table element recognizes some special class values. See section + `table style`_. + + * If the legacy-class-functions_ setting is True, the special macros + ``\DUadmonition`` and ``\DUtitle`` are written with a comma separated + list of class values as optional argument. + +.. _"classes" attribute: ../ref/doctree.html#classes +.. _legacy-class-functions: config.html#legacy-class-functions + +LaTeX code +---------- + +Custom LaTeX code can be placed in `style sheets`_, the +`LaTeX preamble`_, the document body (`raw LaTeX`_), or custom templates_. + +The functional tests that come with Docutils, can serve as example. + +input: + standalone_rst_latex.txt_ (includes files from `tests/functional/input/data`_) +expected output: + standalone_rst_latex.tex_ + +.. _standalone_rst_latex.txt: + https://sourceforge.net/p/docutils/code/HEAD/tree/trunk/docutils/test/functional/input/standalone_rst_latex.txt +.. _tests/functional/input/data: + https://sourceforge.net/p/docutils/code/HEAD/tree/trunk/docutils/test/functional/input/data +.. _standalone_rst_latex.tex: + https://sourceforge.net/p/docutils/code/HEAD/tree/trunk/docutils/test/functional/expected/standalone_rst_latex.tex + + +.. _style sheet: +.. _custom style sheets: + +Style sheets +```````````` + +A common way of LaTeX customization is the preparation of custom style +sheets, either as simple files with LaTeX code snippets or as home-made +`LaTeX packages`_ (see the clsguide_ for an introduction on LaTeX +package writing). + +Options: + stylesheet_ + + It is possible to specify multiple style sheets and mix `LaTeX + packages`_ with custom style sheets. + +You cannot specify package options with the stylesheet_ setting. If +you need to pass options to the package, use the ``\usepackage`` +command in the `LaTeX preamble`_ or a custom style sheet. + +Example 1: + Select Latin Modern fonts with the `lmodern` package:: + + --stylesheet=lmodern + +Example 2: + Use the `preamble.tex` home-made custom style sheet together with + the package `kerkis` (Bookman fonts):: + + --stylesheet=kerkis,preamble.tex + +Example 3: + Select Palatino fonts with old-style numbers and true small-caps + with the LaTeX command :: + + \usepackage[osf,sc]{mathpazo} + + in the `LaTeX preamble`_ or `custom style sheets`_. + +Stylesheet Repository + There is a `repository of user-contributed style sheets`_ in the + Docutils Sandbox_. + +.. _clsguide: https://mirrors.ctan.org/macros/latex/base/clsguide.pdf +.. _stylesheet: config.html#stylesheet-latex-writers +.. _embed-stylesheet: config.html#embed-stylesheet-latex-writers +.. _repository of user-contributed style sheets: + ../../../sandbox/stylesheets/ +.. _sandbox: ../../../sandbox/ + + +LaTeX preamble +`````````````` + +Configuration by LaTeX code in the document preamble is also possible +without a separate stylesheet. This way, packages can be loaded with +options or commands re-defined without the need to create a separate +file (new in Docutils 0.7). + +Option: + latex-preamble_ + +Default: + used for `font setup`_ + +Example: + To use the better looking ``txtt`` font for monospaced text define the + latex-preamble_ setting in a configuration file:: + + latex-preamble: \renewcommand{\ttdefault}{txtt} + \usepackage{mathptmx} % Times + \usepackage[scaled=.92]{helvet} % Helvetica + +.. _latex-preamble: config.html#latex-preamble +.. _PDF standard fonts: https://en.wikipedia.org/wiki/PDF#Standard_Type_1_Fonts +.. _Linux Libertine: http://www.linuxlibertine.org + + +Templates +````````` + +Some customizations require commands at places other than the insertion +point of stylesheets or depend on the deletion/replacement of parts of the +document. This can be done via a custom template. See the `publisher +documentation`_ for a description of the document parts available in a +template file. + +Option: + template_ + +In addition to the 'default.tex' template, the latex writer directory +contains the alternatives 'titlepage.tex' (separate title page) and +'titlingpage.tex'" (separate title page with the `memoir`_ +`document class`_). + +Example: + Print a title page including docinfo, dedication, and abstract:: + + --template=titlepage.tex + +.. _publisher documentation: ../api/publisher.html +.. _template: config.html#template-latex-writers + +Raw LaTeX +````````` + +By means of the `raw directive`_ or a derived `custom role`_, one can +give commands directly to LaTeX. These can be both, styling as well as +printing commands. + +Example: + Math formula:: + + .. raw:: latex + + \[x^3 + 3x^2a + 3xa^2 + a^3,\] + + (Drawback: the formula will be invisible in other output formats. Better + use the `math directive`_) + +Most LaTeX code examples also work as raw LaTeX inside the document. +An exception are commands that need to be given in the document +preamble (e.g. package loading with ``\usepackage``, which can be +achieved with the ``--style-sheet`` or ``--latex-preamble`` command +line options instead). Remember to use *re-defining* commands for +customizing `Docutils specific LaTeX macros`_ with raw LaTeX. + +Example: + Define the transition command as page break:: + + .. raw:: latex + + \renewcommand*{\DUtransition}{\pagebreak[4]} + +See also: + * Defining a macro for a `custom role`_. + * Forcing `page breaks`_. + +.. _raw directive: ../ref/rst/directives.html#raw +.. _math directive: ../ref/rst/directives.html#math + + +How to configure the ... +======================== + +admonitions +----------- + +Admonitions__ are specially marked "topics" that can appear anywhere an +ordinary body element can. + +__ ../ref/rst/directives.html#admonitions + +Environment: + ``DUadmonition`` + + (Command ``\DUadmonition`` with legacy-class-functions_.) + +Default: + Typeset in a frame (90 % of text width). + +The admonition title is typeset with the ``\DUtitle`` command (see `titles`_). + +Example 1: + A lighter layout without the frame:: + + \newenvironment{DUadmonition}% + {\begin{quote}} + {\end{quote}} + +Example 2: + Print all admonitions in the margin:: + + \usepackage{environ} + \NewEnviron{DUadmonition}{\marginpar{\BODY}} + +Example 3: + Use the ``.. note::`` admonition for a margin note:: + + \usepackage{environ} + \newcommand{\DUCLASSnote}{% + \RenewEnviron{DUadmonition}{\marginpar{\BODY}}% + \renewcommand{\DUtitle}[1]{}% suppress title ("Note") + } + +.. caution:: Make sure there is enough space in the margin. + ``\marginpar`` fails in some places or with some content. See also the + environ_ and marginnote_ packages. + +.. _environ: https://ctan.org/pkg/environ +.. _marginnote: https://ctan.org/pkg/marginnote + + +.. _custom role: + +custom interpreted text roles +----------------------------- + +The rst `role directive`_ allows defining custom `text roles`_ that mark +parts of inline text (spans) with class arguments (see section classes_). + +Commands: + ``\DUrole``: dispatcher command + + ``\DUrole«classargument»``: optional styling command with 1 argument (the + role content). + +Default: + The default definition of ``\DUrole{«classargument»}{}`` calls the macro + named ``\DUrole«classargument»{}`` if it is defined and silently ignores + this class argument if not. + +Example 1: + Typeset text in small caps:: + + .. role:: smallcaps + + :smallcaps:`Fourier` transformation + + This is transformed to the LaTeX code:: + + \DUrole{smallcaps}{Fourier} transformation + + The definition :: + + \newcommand{\DUrolesmallcaps}{\textsc} + + as `raw LaTeX`_ or in the custom `style sheet`_ will give the expected + result (if the text font_ supports small caps). + +Example 2: + Subscript text in normal size and *italic* shape:: + + .. role:: sub(subscript) + + As "sub" inherits from the standard "subscript" role, the LaTeX macro + only needs to set the size and shape:: + + \newcommand{\DUrolesub}{\normalsize\itshape} + +Example 3: + A role with several classes and a converted class name:: + + .. role:: custom4 + :class: argI argII arg_3 + + is translated to the nested commands:: + + \DUrole{argi}{\DUrole{argii}{\DUrole{arg-3}{<content>}}} + + With the definitions:: + + \newcommand{\DUroleargi}[1]{\textsc} + \newcommand{\DUroleargii}[1]{{\large #1}} + \makeatletter + \@namedef{DUrolearg-3}{\textbf} + \makeatother + + in a `style sheet`_\ [#]_ or as `raw LaTeX`_ in the document source, + text styled with ``:custom4:`large bold small-caps``` will be typeset + accordingly. + +.. [#] Leave out the ``\makeatletter`` - ``\makeatother`` pair if the style + sheet is a LaTeX package (``*.sty``). + +.. _role directive: ../ref/rst/directives.html#role +.. _text roles: ../ref/rst/roles.html +.. _class directive: ../ref/rst/directives.html#class + +definition lists +---------------- + +ReStructuredText `definition lists`__ correspond to HTML ``<dl>`` list +objects. + +Environment: + ``description``: LaTeX standard environment + +Command: + ``\descriptionlabel``: styling macro for the description term + +Default: + bold label text, hanging indent + +Example: + A non-bold label can be achieved with:: + + \renewcommand\descriptionlabel[1]{\hspace\labelsep \normalfont #1} + +__ ../ref/rst/restructuredtext.html#definition-lists + + +document class +-------------- + +There are hundreds of LaTeX document classes installed by modern +LaTeX distributions, provided by publishers, or available at CTAN_. + +Popular document classes: + * article, report, book: standard document classes + * scrartcl, scrrprt, scrbook: KOMA-script_ classes + * memoir_: highly configurable class for larger documents + +Option: + documentclass_ + +.. _KOMA-script: https://ctan.org/pkg/koma-script +.. _memoir: https://ctan.org/pkg/memoir +.. _documentclass: config.html#documentclass + + +document info +------------- + +Content of the `bibliographic fields`__ at the top of a document. +By default, docinfo items are typeset as a table. + +Options: + use-latex-docinfo_, use-latex-abstract_ + +Length: + ``\DUdocinfowidth``: the width for the `docinfo` table. + +Default: + 90 % of text width: ``0.9\textwidth`` + +Example: + set to 70 % of text width:: + + \newlength{\DUdocinfowidth} + \setlength{\DUdocinfowidth}{0.7\textwidth} + +__ ../ref/rst/restructuredtext.html#bibliographic-fields + +.. _use-latex-docinfo: config.html#use-latex-docinfo +.. _use-latex-abstract: config.html#use-latex-abstract + + +document title +-------------- + +A lone top-level section title is (usually) transformed to the document title +(see `section structure`_). + +The format of the document title is defined by the `document class`_. The +"article" document class uses an in-page title and the "report" and "book" +classes write a separate title page. See the `TeX FAQ`_ on how to customize +the `style of document titles`_. + +The default title page shows only title and subtitle, date and author +are shown in the `document info`_ table. + +Options: + use-latex-docinfo_ + + ``--template=titlepage.tex`` Put docinfo and abstract into the title page. + A separate title page is used also with the "abstract" document class. + +.. _section structure: rst/quickref.html#section-structure +.. _TeX FAQ: http://www.tex.ac.uk/faq +.. _style of document titles: + http://www.tex.ac.uk/cgi-bin/texfaq2html?label=titlsty + + +field lists +----------- + +`Field lists`__ may be used as generic two-column table constructs in +documents. + +Environment: + ``DUfieldlist`` + +Default: + Indented description list. + +Example: + Use a description list customized with enumitem_:: + + \usepackage{enumitem} + \newenvironment{DUfieldlist}% + {\description[font=,style=sameline,leftmargin=8em]} + {\enddescription} + } + + The `KOMA-script`_ classes provide a similar environment under the name + `labeling`. + +.. _enumitem: https://ctan.org/pkg/enumitem +__ ../ref/rst/restructuredtext.html#field-lists + + +figure and table captions +------------------------- + +The caption_ package provides many ways to customise the captions in +floating environments like figure and table. + +The chngcntr_ package helps to configure the numbering of figure and table +caption numberings. + +Some document classes (e.g. KOMA-script_) provide additional configuration. +Also see the related `LaTeX FAQ entry`__ + +Example + :: + + \usepackage{caption} + \captionsetup{justification=raggedleft,singlelinecheck=false} + +.. _caption: https://ctan.org/pkg/caption +.. _chngcntr: https://ctan.org/pkg/chngcntr +__ http://www.tex.ac.uk/cgi-bin/texfaq2html?label=running-nos + + +figure placement +---------------- + +Figures_ might be typeset at the place of definition (default) or "float" +to a suitable place at the top or bottom of a page. This is implemented +using the float_ package. + +Command: + ``\floatplacement`` + +The placement setting is valid from the point of definition until the next +``\floatplacement`` command or the end of the document. See float.pdf_ for +details. + +Default: + ``\floatplacement{figure}{H}`` (here definitely). This corresponds most + closely to the source and HTML placement (principle of least surprise). + +Example 1: + In a custom `style sheet`_, set the default to let LaTeX find a suitable + place for figure floats:: + + \usepackage{float} + \floatplacement{figure}{htbp} % here, top, bottom, extra-page + +Example 2: + To move all following figures to the top or bottom of the page write in + the document source:: + + .. raw:: latex + + \floatplacement{figure}{tb} + +.. _figures: ../ref/rst/directives.html#figure +.. _float: https://ctan.org/pkg/float +.. _float.pdf: https://mirrors.ctan.org/macros/latex/contrib/float/float.pdf + + +.. _font setup: + +font +---- + +The selected text font influences the *look*, the *feel*, +and the *readability* of the document (cf. +http://www.csarven.ca/web-typography). +Selecting a suitable font also solves the problem with `bad looking +PDF output`_. + +Font selection is one of the main differences between LaTeX and XeTeX/LuaTeX: + +LaTeX + cannot use the fonts of the operating system directly but needs + specially installed fonts with additional supporting files. + +XeTeX/LuaTeX + can use system fonts and provides access to the full feature set of + modern OpenType_ fonts. + +.. _OpenType: https://en.wikipedia.org/wiki/OpenType + +The default font setup is done in the latex-preamble_: + +LaTeX + `PDF standard fonts`_ (Times, Helvetica, Courier) + +XeTeX/LuaTeX + `Linux Libertine`_, a free, high quality alternative to Times with a + wide coverage of glyphs, styles, and OpenType features. + + Despite its name, Linux Libertine can be used on any operating + system that can handle OpenType fonts. + +Alternative fonts can be selected by + +LaTeX + a) specifying the corresponding LaTeX package(s) as argument to the + stylesheet_ option_ or with the ``\usepackage`` LaTeX command. + + * packages can be combined, + * passing options to a package is only possible in a `style sheet`_ + or the `LaTeX preamble`_. + + b) changing the font-default macros ``\rmdefault``, ``\sfdefault`` + and/or ``\ttdefault`` in a custom `style sheet`_, the `LaTeX + preamble`_ or `raw LaTeX`_. + + Example 1: + Use `Latin Modern`_. `LaTeX code`_:: + + \usepackage{lmodern} + + Command line argument:: + + --stylesheet=lmodern + + Example 2: + The _`Times/Helvetica/Courier` `PDF standard fonts`_ are + selected by the LaTeX code [#]_:: + + \usepackage{mathptmx} % Times for serif and math + \usepackage[scaled=.90]{helvet} % downscaled Helvetica for sans serif + \usepackage{courier} % Courier for teletype (mono-space) + + Since Docutils 0.7, this is the default value of the + `latex-preamble`_ option. + + .. [#] When generating PDF-files from LaTeX, the `PDF standard + fonts`_ do not need to be embedded in the document. While this + results in smaller files, the actually used fonts on screen and in + print might differ! (For details see, e.g., the testflow_ package + documentation.) + + + Example 3: + Use the teletype font from the txfonts_ package. As there is no + package for this, we re-define the font macro with the `LaTeX code`_:: + + \renewcommand{\ttdefault}{txtt} + + +XeTeX/LuaTeX + using the macros of the fontspec_ package. Use some font-viewer or + -manager (e.g. fontmatrix_) to find out the correct names of the + fonts on your system. + + Example: + DejaVu_, very wide coverage, screen optimized. As this font + runs wide, add ``DIV=10`` to the `documentoptions`_:: + + \setmainfont{DejaVu Serif} + \setsansfont{DejaVu Sans} + \setmonofont[HyphenChar=None]{DejaVu Sans Mono} + +.. _fontspec: https://ctan.org/pkg/fontspec +.. _fontmatrix: http://fontmatrix.net/ +.. _DejaVu: http://dejavu-fonts.org/ +.. _documentoptions: config.html#documentoptions + +choice of suitable fonts +```````````````````````` + +High quality free fonts suitable for use with XeTeX/LuaTeX are, e.g., listed +at `Good Libre Fonts`_, `25 Best Free Quality Fonts`_ and the update +`19 More Free Quality Fonts`_. + +The `LaTeX Font Catalogue`_ provides information and examples for a wide +range of fonts available for use with LaTeX. Here is just a selection: + +a) The `Latin Modern`_ (LM) fonts are extended outline versions of the + standard TeX font Computer Modern (CM). + + +1 simple invocation: ``--stylesheet=lmodern`` + + +1 keeps the traditional TeX "look and feel": + + +1 generally accepted as high quality CM replacement, + +1 comprehensive math support, + +1 including optical sizes, + +1 compatible with extensions made to match CM, + -1 modern types are hard to read at low (screen) resolutions. + + -1 not part of a minimal standard TeX installation + +b) CM-Super_ is another outline CM replacement. + + +1 simple invocation: modern LaTeX distributions use CM-Super + automatically instead of CM if it is installed. + + -1 said to be of inferior quality compared to LM. + + -1 not part of a minimal standard TeX installation, + bigger download size than Latin Modern (64 MB). + +c) `Bera`_ (Bitstream Vera) + + +1 simple invocation: ``--stylesheet=bera`` + + +1 optimized for on-screen viewing with goot hinting + + -1 not part of a minimal standard TeX installation + +d) PSNFSS_ Postscript fonts + + +1 part of every standard TeX installation + + +1 smaller PDF/Postscript document size if standard fonts are not + embedded + + -1 restricted set of glyphs in the free versions [#]_ + + -1 different fonts for roman, sans-serif and typewriter fonts. + + -1 invocation somewhat more complex, as several packages are + required for a complete font set, sometimes including package + options. + + Roman (serif) PSNFSS fonts: + + Bookman + good legibility but very wide. + + Charter + bread-and-butter type optimized for printing on low-resolution + printers + + New Century Schoolbook + good legibility but very wide. + + Palatino + +1 recommended by font experts + +1 good LaTeX support including matching math fonts, small caps, + old-style figures + -1 bad rendering in xpdf viewer (auto-hinting leads to different + x-hight for different characters at some magnifications) + (this is fixed in recent versions). + + Times + +1 the serif `PDF Standard Font`_, + -1 overused and quite narrow (devised for multi-column layouts). + + Utopia + recommended by font experts + + + .. table:: Font packages for standard Postscript fonts + (cf. `Using common Postscript fonts with LaTeX`_) + + ========= ============ ============= ============= ========= + Package Roman Sans Serif Typewriter Math + ========= ============ ============= ============= ========= + (none) CM Roman CM Sans Serif CM Typewriter CM Math + + mathpazo Palatino Palatino + + mathptmx Times Times + + helvet Helvetica + + avant Avant Garde + + courier Courier + + chancery Zapf + Chancery + + bookman Bookman Avant Garde Courier + + newcent New Century Avant Garde Courier + Schoolbook + + charter Charter + + utopia Utopia + + fourier Utopia Fourier + ========= ============ ============= ============= ========= + +.. [#] Extended versions of the standard Postscript fonts including + accented chars, as well as real small-caps + and old-style numbers are available with the `TeX Gyre`_ bundle + which is part of, e.g., `TeX Live`_. + + +.. _LaTeX Font Catalogue: http://www.tug.dk/FontCatalogue/ +.. _Latin Modern: https://ctan.org/pkg/lm +.. _CM-Super: https://ctan.org/pkg/cm-super +.. _bera: https://ctan.org/pkg/bera +.. _TeX Gyre: http://www.gust.org.pl/projects/e-foundry/tex-gyre +.. _PSNFSS: https://ctan.org/pkg/psnfss +.. _Using common PostScript fonts with LaTeX: + https://mirrors.ctan.org/macros/latex/required/psnfss/psnfss2e.pdf +.. _TeX Live: http://tug.org/texlive/ +.. _txfonts: https://ctan.org/pkg/txfonts +.. _PDF Standard Font: + https://en.wikipedia.org/wiki/PDF#Standard_Type_1_Fonts +.. _testflow: + http://www.tex.ac.uk/tex-archive/help/Catalogue/entries/testflow.html +.. _Good Libre Fonts: http://typophile.com/node/18207 +.. _25 Best Free Quality Fonts: + http://www.alvit.de/blog/article/20-best-license-free-official-fonts +.. _19 More Free Quality Fonts: + http://www.smashingmagazine.com/2006/10/11/17-more-free-quality-fonts/ + + +font encoding +------------- + +LaTeX font encodings are described in detail in the encguide_ which is +part of the LaTeX base documentation. + +Option: + font-encoding_ + +Default: + "T1" + +Example 1: + Use the (obsolete) LaTeX default encoding "OT1":: + + --font-encoding=OT1 + + or (without loading the fontenc_ package):: + + --font-encoding="" + + This will improve the look on screen with the default Computer Modern + fonts at the expense of problems with `search and text extraction`_ + The recommended way is to select a T1-encoded "Type 1" (vector) + font, for example `Latin Modern`_ + +Example 2: + Support for characters in the Unicode blocks Latin, Latin-1 Supplement, + and Greek together with a T1-encoded "Type 1" (vector) font, for example + `Latin Modern`_:: + + --font-encoding=LGR,T1 --stylesheet=lmodern + +.. _encguide: https://mirrors.ctan.org/macros/latex/base/encguide.pdf +.. _font-encoding: config.html#font-encoding +.. _fontenc: https://ctan.org/pkg/fontenc + + +font size +--------- + +Add font size in points to the document options, e.g. +``--documentoptions=12``, use e.g. the document classes provided by +extsizes_ for values other than [10,11,12]. + +.. _extsizes: https://ctan.org/pkg/extsizes + + +footnotes +--------- + +By default, footnotes are set with Docutils-specific wrappers around +the standard ``\footnotemark`` and ``\footnotetext`` commands. You +can configure the footnote layout similar to standard LaTeX footnotes +in a custom `style sheet`_ or the `LaTeX preamble`_. + +Further configuration is possible by alternative definitions of +``\DUfootnotemark`` and ``\DUfootnotetext`` + +Example 1: + Set footnote text with a hanging indent. + + * This is the default with KOMA-script_ classes, e.g:: + + --documentclass=scrartcl + + (for further configuration, see the `KOMA-script Guide`_), + + * with package footmisc_:: + + \usepackage[hang]{footmisc} + \setlength{\footnotemargin}{0em} + + (play with the ``\footnotemargin`` setting), + + * redefine ``\DUfootnotetext`` inserting `\hangindent`:: + + \newcommand{\DUfootnotetext}[4]{% + \begingroup% + \renewcommand{\thefootnote}{% + \protect\raisebox{1em}{\protect\hypertarget{#1}{}}% + \protect\hyperlink{#2}{#3}}% + \footnotetext{\hangindent=2em #4}% + \endgroup% + } + + (adapt the ``\hangindent`` value). + +Example 2: + Footnote marks in normal font size, not superscript:: + + \usepackage{scrextend} % not required with KOMA-script document classes + \deffootnote{1em}{1em}{\thefootnotemark\ } + + (See the `KOMA-script Guide`_ for details and other options.) + +Example 3: + Place the footnote text where it appears in the source document (instead + of at the page bottom). This can be used to get the effect of endnotes + (needs the hanging_ package):: + + \usepackage{hanging} + \newcommand{\DUfootnotetext}[4]{% + \par\noindent\raisebox{1em}{\hypertarget{#1}{}}% + \hyperlink{#2}{#3}% + \hangpara{\parindent}{1}#4% + } + +.. _footmisc: https://ctan.org/pkg/footmisc +.. _hanging: https://ctan.org/pkg/hanging + + +hyphenation +----------- + +The amount of hyphenation is influenced by ``\hyphenpenalty``, setting it to +10000 almost prevents hyphenation. As this produces lines with more space +between words one should increase Latex's ``\tolerance`` for this. + +Example: + :: + + \hyphenpenalty=5000 + \tolerance=1000 + + +hyperlinks +---------- + +Options: + hyperlink-color_, hyperref-options_ + +Hyperlinks are realized using the hyperref_ package. As it re-defines many +standard LaTeX macros, this package is loaded last, *after* the style +sheets. + +However, you can load hyperref before a package that requires its +presence in a `style sheet`_ or the `LaTeX preamble`_ (see example +below). This will ignore options set with hyperlink-color_ and +hyperref-options_. + +URLs are typeset with the "url" package (loaded implicitly by "hyperref"). +The font of URLs can be defined with the ``\urlstyle`` command. Valid +arguments are + +:same: normal text font, Docutils default, +:tt: teletype (monospaced), LaTeX default, +:rm: roman, +:sf: sans serif + +Example: + Custom loading of the hyperref package also switches to + the LaTeX default (monospaced fonts for URLs). Reset to use the text + font:: + + \usepackage[unicode,colorlinks=true,linkcolor=green]{hyperref} + \urlstyle{same} + +See also `non-breaking hyperlinks`_. + +.. _hyperlink-color: config.html#hyperlink-color +.. _hyperref-options: config.html#hyperref-options + + +disable hyperlinks +`````````````````` + +To suppress the hyper-linking completely (e.g. for printing or to +avoid clashes with other packages), set hyperref-options_ to "draft" +or load the "nohyperref" package that comes with the "hyperref" +bundle. + +Option: + ``--hyperref-options=draft`` + +`LaTeX code`_:: + + \usepackage{nohyperref,url} + \urlstyle{same} + +.. _hyperref: https://ctan.org/pkg/hyperref + + +language +-------- + +The global document language can be set with the language-code_ +configuration setting. The language of text parts can be set adding the +language tag prefixed by "language-" to an element's classes_ +attribute, e.g. ``language-el`` for a Greek text part. + +.. _language-code: config.html#language-code + + +line blocks +----------- + +In `line blocks`__, newlines and leading whitespace are respected. + +Environment: + ``DUlineblock``: special list environment for line blocks + +Length: + ``\DUlineblockindent``: indentation of indented lineblock parts. + +Default: + 2.5 times the font height: ``2.5em`` + +Example: + set to the paragraph indentation:: + + \newlength{\DUlineblockindent} + \setlength{\DUlineblockindent}{\parindent} + +__ ../ref/rst/restructuredtext.html#line-blocks + + +line spacing +------------ + +Commands: + ``\linespread``: for small adjustments + + ``\singlespacing``, ``\onehalfspacing``, and ``\doublespacing``: from + package `setspace` + +Example 1: + Get document wide double spacing:: + + \usepackage{setspace} + \doublespacing + +Example 2: + Increase line spacing by five percent for better readability:: + + \linespread{1.05} + + +literal blocks +-------------- + +No markup processing is done within a `literal block`__. It is left as-is, +and is typically rendered in a monospaced typeface + +Option: + literal-block-env_ + +Example: + + ``--literal-block-env=lstlisting`` + + The ``lstlisting`` environment is highly configurable (as documented in + listings.pdf_) and provides syntax highlight for many programming languages, + for instance :: + + \renewcommand{\ttdefault}{txtt} + \lstset{language=Python, morekeywords=[1]{yield}} + \lstloadlanguages{Python} + \lstset{ + basicstyle=\ttfamily, + keywordstyle=\bfseries, + commentstyle=\rmfamily\itshape, + stringstyle=\slshape, + } + \lstset{showstringspaces=false} + \lstset{columns=fullflexible, + basewidth={0.5em,0.4em}} + + and to get LaTeX syntax highlight for a code block with "listings":: + + \lstloadlanguages{[LaTeX]TeX} % comma separated list of languages + \newcommand{\DUCLASSlatex}{\lstset{language=[LaTeX]TeX}} + + The indentation of literal blocks can be reset with :: + + \lstset{resetmargins=true} + + and/or configured with e. g.:: + + \lstset{xleftmargin=-2em} + +__ ../ref/rst/restructuredtext.html#literal-blocks +.. _literal-block-env: config.html#literal-block-env +.. _listings.pdf: + https://mirrors.ctan.org/macros/latex/contrib/listings/listings.pdf + + +lists +----- + +Remove extra vertical whitespace between items of bullet lists and +enumerated lists. + +Example: + Pass the class argument "compact" to the list:: + + .. class:: compact + + * first item + * second item + + The following lines for the `LaTeX preamble`_ use the enumitem_ package to + remove spacing from all lists with class argument "compact":: + + \usepackage{enumitem} + \newcommand*{\DUCLASScompact}{\setlist{noitemsep}} + + +list of figures/tables +---------------------- + +Docutils does not support lists of figures or tables. + +However, with LaTeX, they can be generated using `raw LaTeX`_ in the +document source. + +Commands: + ``\listoffigures``: a list of figures + + ``\listoftables``: a list of tables + +Example: + :: + + .. raw:: latex + + \listoffigures + + +option list +----------- + +`Option lists`__ are two-column lists of command-line options and +descriptions, documenting a program's options. + +Environment: + ``DUoptionlist``: environment for option lists, + +Command: + ``\DUoptionlistlabel``: set appearance of the options + +Example: + set command options with a bold monospace font:: + + \newcommand{\DUoptionlistlabel}{\texttt{\textbf{#1}} \hfill} + +__ ../ref/rst/restructuredtext.html#option-lists + + +page breaks +----------- + +* Page breaks before top-level sections are the default with a + documentclass that provides "chapters", e.g. "book", "memoir" or + "scrbook". + +* Redefining the \section or \section* command in a + style sheet is possible too. + +* `Raw LaTeX`_ or a `custom role`_ can be used. + +* The transition element can be re-defined to produce a page break, + +Commands + ``\newpage``: hard pagebreak at exactly this position + + ``\pagebreak[2]``: recommended page break after line end (precedence 1...4) + +Example: + Define the transition command as page break with the `LaTeX code`_:: + + \newcommand*{\DUtransition}{\pagebreak[4]} + + (use ``\renewcommand`` with `raw LaTeX`_). + +page layout +----------- + +By default, paper size and margin settings are determined by the document +class. + +The following packages help to configure the page layout: + +a) The `typearea`_ package (part of the `KOMA-script`_ bundle) calculates a + *good* page layout (based on rules and recommendations of typography + experts). + + See the `KOMA-Script Guide`_ for details on what is a *good* layout and + how this is achieved. + +b) The `geometry`_ package is recommended if you have to follow guidelines + with fixed values for the margins. + For details see the `geometry manual`_. + +Example 1: + Let `typearea` determine the type area with ``DIV=calc`` in the + documentoptions:: + + --documentoptions='a4paper,DIV=calc' + + The ``DIV`` option can also be specified, like ``DIV=10``. It defines how + "crowded" a page will be: larger values mean larger text area (at the + expense of readability). + +Example 2: + `LaTeX code`_ to set margins with the geometry_ package:: + + \usepackage{geometry} + \geometry{hmargin={3cm,0.8in},height=8in} + \geometry{height=10in}. + +.. _typearea: https://ctan.org/pkg/typearea +.. _geometry: https://ctan.org/pkg/geometry +.. _KOMA-Script Guide: + https://mirrors.ctan.org/macros/latex/contrib/koma-script/doc/scrguien.pdf +.. _geometry manual: + https://mirrors.ctan.org/macros/latex/contrib/geometry/geometry.pdf + + +page headers and footers +------------------------ + +With the fancyhdr_ package or the `KOMA-script`_ classes, you can define +custom page head- and foot-lines. + +The `"header" and "footer" directives`_ save their content in the macros +``\DUheader`` rsp. ``\DUfooter``. The macros can be used in LaTeX code and +will be replaced by LaTeX with the content of the directives. + +Example: + `LaTeX code`_ to place left-aligned "header" and "footer" on every + page with fancyhdr_:: + + \usepackage{fancyhdr} + \fancyhead[L]{\DUheader} + \fancyfoot{} % reset + \fancyfoot[L]{\DUfooter} + \pagestyle{fancy} + + +.. _fancyhdr: http://www.ctan.org/pkg/fancyhdr +.. _"header" and "footer" directives: ../ref/rst/directives.html#header + + +page numbering +-------------- + +Example: + Number pages by chapter (using the chappg_ package):: + + \usepackage{chappg} + + See the `chappg documentation`_ for details. + +.. _chappg: https://ctan.org/pkg/chappg +.. _chappg documentation: + https://mirrors.ctan.org/macros/latex/contrib/chappg/chappg.pdf + + +paper size +---------- + +Paper geometry can be changed using ``--documentoptions`` or with the +`geometry`_ package. + +`LaTeX code`_:: + + \usepackage{geometry} + \geometry{OPTIONLIST} + +Default: + a4paper + +Some possibilities: + +* a4paper, b3paper, letterpaper, executivepaper, legalpaper +* landscape, portrait, twoside. + +Example: + Choose A5 pager in landscape orientation with command line argument:: + + --documentoptions=a5paper,landscape + + The same with LaTeX commands in the `style sheet`_:: + + \usepackage{geometry} + \geometry{a5paper,landscape} + + For details see the `geometry manual`_. + +paragraph indent +---------------- + +Default (in most document classes): + Indent the first line in a paragraph unless it is the first line of a + chapter, section, subsection, or subsubsection. + +Example 1: + To set paragraph indentation to zero but add a vertical space between + load the `parskip` package with the command line argument:: + + --stylesheet=parskip + + or in a custom `style sheet`_ with:: + + \usepackage{parskip} + +Example 2: + To suppress the indentation of a specific paragraph, you may give it the + class "noindent" with, e.g. :: + + .. class:: noindent + + This paragraph should not be indented. + + and define the `custom role`_ command:: + + \newcommand{\DUrolenoindent}[1]{\noindent #1} + +rubric +------ + +A rubric__ is like an informal heading that doesn't correspond to the +document's structure. + +Command: + ``\DUrubric`` + +Default: + subsubsection style (unnumbered), italic + +Example1: + Set centred and red:: + + \newcommand*{\DUrubric}[1]{% + \subsubsection*{\centerline{\color{red}#1}}} + +.. note:: + Class attribute values are ignored because the "classes_ wrapper" + interferes with LaTeX's formatting (spacing/indentation) of text following + a section heading. Consider using a `topic element`_ or a container_. + +__ ../ref/rst/directives.html#rubric +.. _container: ../ref/rst/directives.html#container + +section headings +---------------- + +Options: documentclass_, use-part-section_ + +Section headings are converted into LaTeX macros according to their level, +the document class and the value of the use-part-section_ setting: + +===== ============= ================== ============= ============== +Level article article with part book [#]_ book with part +===== ============= ================== ============= ============== + 1 section part chapter part + 2 subsection section section chapter + 3 subsubsection subsection subsection section + 4 paragraph subsubsection subsubsection subsection + 5 subparagraph paragraph paragraph subsubsection + 6 DUtitle subparagraph subparagraph paragraph + 7 DUtitle DUtitle DUtitle subparagraph +===== ============= ================== ============= ============== + + +.. [#] One of the document classes 'book', 'memoir', 'report 'scrbook', + or 'scrreprt'. + +.. _use-part-section: config.html#use-part-section + +section numbering +----------------- + +Sections are numbered if there is a `sectnum directive`_ in the document. + +Option: sectnum_xform_ + ``--section-numbering``, ``--no-section-numbering`` + +If sectnum_xform_ is False, section numbers are generated by LaTeX. In this +case the "prefix" and "suffix" arguments of the `sectnum directive`_ are +ignored. The section number style is determined by the `document class`_ +and can be configured in a LaTeX `style sheet`_, e.g.:: + + \setcounter{secnumdepth}{5} + +.. note:: The LaTeX name is 'secnumdepth' (without 't'). + +.. _sectnum directive: ../ref/rst/directives.html#sectnum +.. _sectnum_xform: config.html#sectnum-xform + + +sidebar +------- + +Sidebars__ are like miniature, parallel documents that occur inside other +documents, providing related or reference material. They can be likened to +super-footnotes; their content is outside of the flow of the document's main +text. + +Command: + ``DUsidebar`` + +Default: + Box with grey background. + +Example: + Use margin notes:: + + \newcommand{\DUsidebar}[1]{\marginpar{\flushleft #1}} + + * Make sure the margin is wide enough to hold the note. + * This fails with some constructs inside the `side bar` and where + \marginpar cannot be used, e.g., inside floats, footnotes, or in frames + made with the framed package (see marginnote_). + +__ https://docutils.sourceforge.io/docutils/docs/ref/rst/directives.html#sidebar + +size of a pixel +--------------- + +The *physical size* of a pixel depends on the resolution of the output +device and is usually specified in *dots per inch* (DPI). + +The *length unit* "px" is defined by the output format. For LaTeX, it is +`defined in pdfTeX and LuaTeX`__ (the `xetex` writer emulates this +definition). + +Default: + 72 DPI, i.e. 1 px = 1/72 in. [#]_ + +Example: + Set the value to match the CSS definition + with the `LaTeX code`_:: + + \pdfpxdimen=1in + \divide\pdfpxdimen by 96 % 1/96 inch + +.. [#] The `CSS length unit ``px```_ defaults to 1/96 inch. + +__ https://tex.stackexchange.com/questions/41370/ + what-are-the-possible-dimensions-sizes-units-latex-understands +.. _CSS length unit ``px``: https://www.w3.org/TR/css-values-3/#px +.. _reference pixel: https://www.w3.org/TR/css-values-3/#reference-pixel + +table style +------------ + +A pre-configured *table style* can be globally selected via the table_style_ +setting or set for individual tables via a `class directive`_ or the class +option of the `table directive`_. + +Supported values: + +standard + Borders around all cells. + +booktabs + A line above and below the table and one after the head. + +borderless + No borders. + +colwidths-auto + Column width determination by LaTeX. + Overridden by the `table directive`_'s "widths" option. + + .. warning:: + + ``colwidths-auto`` is only suited for tables with simple cell content. + + LaTeX puts the content of auto-sized columns on one line (merging + paragraphs) and may fail with complex content. + +.. eventually in future + + align-left, align-center, align-right + Align tables. + +By default, *column widths* are computed from the source column widths. +The `legacy_column_widths`_ setting selects the conversion algorithm. +Custom column widths can be set with the "widths" option of the `table +directive`_. + +See also the section on problems with tables_ below. + +.. _new_column_widths: +.. _legacy_column_widths: config.html#legacy-column-widths +.. _table_style: config.html#table-style-latex-writers +.. _"widths" option: +.. _table directive: ../ref/rst/directives.html#table + + +table of contents +----------------- + +A `contents directive`_ is replaced by a table of contents (ToC). + +Option: use-latex-toc_ + ``--use-latex-toc``, ``--use-docutils-toc`` + +With use-latex-toc (default since release 0.6): + +* The ToC is generated by LaTeX (via the ``\tableofcontents`` command). + + The layout depends on the chosen document class and can be configured in + a custom `style sheet`_ (see e.g. the `KOMA-Script Guide`_ for the + `KOMA-script`_ classes). + +* The depth of the ToC and PDF-bookmarks can be configured + + + with the "depth" argument of the `contents directive`_, or + + + in a style sheet with e.g. ``\setcounter{tocdepth}{5}``. + +* Local ToCs are done with the minitoc_ package. See the `minitoc + documentation`_ for the numerous configuration options. + +.. note:: + Minitoc supports local ToCs only at "part" and top section level + ("chapter" or "section"). Local `contents` directives at lower levels + are ignored (a warning is issued). + + This is an intended feature of the minitoc_ package. If you really + require local ToCs at lower level, turn off the use-latex-toc_ option. + +.. _use-latex-toc: config.html#use-latex-toc +.. _contents directive: ../ref/rst/directives.html#contents +.. _minitoc: https://ctan.org/pkg/minitoc +.. _minitoc documentation: + https://mirrors.ctan.org/macros/latex/contrib/minitoc/minitoc.pdf + + +title reference role +-------------------- + +`Title reference`_ is the default `default role`_ for `interpreted text`_. + +Command: + ``\DUroletitlereference`` + +Default: + use slanted font (``\textsl``) + +Example: + set title references with a bold monospace font:: + + \newcommand{\DUroletitlereference}[1]{\texttt{\textbf{#1}}} + +.. _title reference: ../ref/rst/roles.html#title-reference +.. _default role: + ../ref/rst/directives.html#setting-the-default-interpreted-text-role +.. _interpreted text: ../ref/rst/restructuredtext.html#interpreted-text + + +titles +------ + +The titles of admonitions_, sidebar_, and `topic element`_ use +the ``\DUtitle`` command. + +Example 1: + a centered and somewhat larger title for topcis:: + + \newcommand*{\DUCLASStopic}{ + \renewcommand*{\DUtitle}[1]{\subsection*{\centering #1} + } + +Example 2: + a right-pointing hand as title for the "attention" directive:: + + \usepackage{pifont} + \newcommand*{\DUCLASSattention}{ + \renewcommand*{\DUtitle}[1]{\ding{43}} + } + + The title argument is "swallowed" by the command. + To have both, hand and title use:: + + \usepackage{pifont} + \newcommand*{\DUCLASSattention}{ + \newcommand*{\DUtitle}[1]{\ding{43} #1} + } + + +text encoding +------------- + +The encoding of the LaTeX source file is Docutils' *output* encoding +but LaTeX' *input* encoding. + +Option: output-encoding_ + ``--output-encoding=OUTPUT-ENCODING`` + +Default: + "utf-8" + +Example: + Encode the LaTeX source file with the ISO `latin-1` (west european) + 8-bit encoding (the default in Docutils versions up to 0.6.):: + + --output-encoding=latin-1 + +Note: + 8-bit LaTeX comes with two options for UTF-8 support, + + :utf8: by the standard `inputenc`_ package with only limited coverage + (mainly accented characters). + + :utf8x: supported by the `ucs`_ package covers a wider range of Unicode + characters than does "utf8". It is, however, a non-standard + extension and no longer developed. + + Currently, the "latex2e" writer inserts ``\usepackage[utf8]{inputenc}`` + into the LaTeX source if it is UTF-8 encoded. + +.. with utf8x: + If LaTeX issues a Warning about unloaded/unknown characters adding :: + + \PreloadUnicodePage{n} + + (where *n* is the Unicode page-number) to the style sheet might help. + +.. _LaTeX Unicode: http://www.unruh.de/DniQ/latex/unicode/ +.. _output-encoding: config.html#output-encoding +.. _inputenc: https://ctan.org/pkg/inputenc +.. _ucs: https://ctan.org/pkg/unicode + + +topic element +------------- + +A topic_ is like a block quote with a title, or a self-contained section +with no subsections. Topics and rubrics can be used at places where a +`section title`_ is not allowed (e.g. inside a directive). + +Example: + Use a standard paragraph for a topic:: + + \newcommand{\DUCLASStopic}{% + \renewenvironment{quote}{}{}% + } + +.. _topic: ../ref/rst/directives.html#topic +.. _section title: ../ref/rst/restructuredtext.html#sections + + +transition element +------------------ + +Transitions__ are commonly seen in novels and short fiction, as a gap +spanning one or more lines, marking text divisions or signaling changes in +subject, time, point of view, or emphasis. + +Command: + ``\DUtransition`` + +Default: + A horizontal line, 1/3 of text width + +Example 1: + Use three stars:: + + \newcommand*{\DUtransition}{\centering{}*\quad*\quad*} + + Alternatively use the more elaborated version in `transition-stars.sty`_. + +Example 2: + If paragraphs are separated by indentation, you can simply use a vertical + space:: + + \newcommand*{\DUtransition}{\vspace{2ex}} + +__ https://docutils.sourceforge.io/docutils/docs/ref/rst/restructuredtext.html#transitions +.. _transition-stars.sty: ../../../sandbox/stylesheets/transition-stars.sty + + +Changes +======= + +* The Docutils HISTORY_ lists all changes during the history of docutils. + Important changes are summarized in the RELEASE-NOTES_. + +.. _HISTORY: ../../HISTORY.html +.. _RELEASE-NOTES: ../../RELEASE-NOTES.html + + +Problems +======== + +Troubleshooting +--------------- + +Bad looking PDF output +`````````````````````` + + What I am looking for when I try Docutils is if the PDF files I can get + are of high quality. Unfortunately that never is the case. + + So am I just stupid or is there a way to get really high quality pdf from + Docutils? + +Make sure the default font is not a bitmap font. + +There is `Latin Modern`_ if you like the look of the standard font on paper, +but want nice pdf. Or select something else like Times, Palatino, ... via +configuration `options/settings`_. See font_ and font-encoding_. + + +footnote mark and text at different pages +````````````````````````````````````````` + +Docutils stores the footnote text in a separate node, at the position where +it is specified in the input document. With the default settings, the +footnote is put at the bottom of the page where the footnote text is located, +maybe far away from the footnote mark (see e.g. `<rst/demo.txt>`_). + +To get footnote mark and text at the same page, keep footnote mark and +footnote text close together. + + +non-breaking hyperlinks +``````````````````````` + +If you convert with ``latex`` (as opposed to ``pdflatex``), hyperlinks will +not wrap and sometimes stick into the margin. + +Wrong: + :: + + \usepackage[breaklinks=true]{hyperref} + + "breaklinks" is an internal option that indicates whether the chosen + driver can handle split links. (It might work to *disable* link breaking.) + +Right: + Use one of the following: + + a) compile with pdflatex_, + + b) use the package breakurl_, + + c) (for printout) `disable hyperlinks`_ using the package "nohyperref". + +See also the `Link text doesn’t break at end line`_ LaTeX FAQ entry. + +.. _breakurl: https://ctan.org/pkg/breakurl +.. _Link text doesn’t break at end line: + http://www.tex.ac.uk/cgi-bin/texfaq2html?label=breaklinks + + +Glyph not defined in PD1 encoding +````````````````````````````````` + +If a section title or other link contains non-Latin (e.g. Cyrillic) +characters, the LaTeX log contains lots of warnings like:: + + Package hyperref Warning: Glyph not defined in PD1 encoding, + (hyperref) removing `\CYRZ' on input line 6. + ... + +This can be solved with the "unicode" hyperref_option_ setting:: + + --hyperref-option=unicode + +(works also with non-unicode input/output encoding (e.g. "koi8r" or +"latin1"). Newer versions of hyperref default to "unicode=true". + +.. _hyperref_option: config.html#stylesheet-latex-writers + + +image inclusion +``````````````` + +Images__ are included in LaTeX with the help of the `graphicx` package. The +supported file formats depend on the used driver: + +* pdflatex_, lualatex, and xelatex_ work with PNG, JPG, or PDF, + but **not EPS**. +* Standard latex_ can include **only EPS** graphics, no other format. +* latex + dvipdfmx works with EPS and JPG (add 'dvipdfmx' to the + documentoptions_ or graphicx-option_ setting + and 'bmpsize' to the stylesheet_ setting). + +If PDF-image inclusion in PDF files fails, specifying +``--graphicx-option=pdftex`` might help. + +For details see grfguide.pdf_. + +The Rubber_ wrapper can be used for automatic image conversion. + +Docutils expects an URI as pointer to the image file. The latex writer +transforms this URI to a local path. By default, LaTeX does not accept +spaces and more than one dot in the filename. If using "traditional" +filenames is not an option, adding grffile_ to the `style sheets`_ +can help. + +__ ../ref/rst/directives.html#images +.. _grfguide.pdf: + https://mirrors.ctan.org/macros/latex/required/graphics/grfguide.pdf +.. _grffile: https://ctan.org/pkg/grffile +.. _graphicx-option: config.html#graphicx-option + + +Why are my images too big? +`````````````````````````` + +HTML-browsers use the actual screen resolution (usually around +100 DPI). + +The CSS specification suggests: + + It is recommended that the reference pixel be the visual angle of one + pixel on a device with a pixel density of 96 DPI and a distance from the + reader of an arm's length. + + -- https://www.w3.org/TR/CSS2/syndata.html#length-units + +This is why pixmap images without size specification or objects with a size +specified in ``px`` tend to come too large in the PDF. + +Solution: + Specify the image size in fixed units (``pt``, ``cm``, ``in``) or + configure the `size of a pixel`_ (length unit px). + + +Error ``illegal unit px`` +````````````````````````` + +If you convert the LaTeX source with a legacy program, you might get this +error. + +The unit "px" was introduced by the `pdfTeX` converter on 2005-02-04. +`pdfTeX` is used also for conversion into DVI format in all modern LaTeX +distributions (since ca. 2006). + +If updating LaTeX is not an option, just remove the "px" from the length +specification. HTML/CSS will default to "px" while the `latexe2` writer +will add the fallback unit "bp". + + +Error ``Symbol \textcurrency not provided`` ... +``````````````````````````````````````````````` + +The currency sign (\\u00a4) is not supported by all fonts (some have +an Euro sign at its place). You might see an error like:: + + ! Package textcomp Error: Symbol \textcurrency not provided by + (textcomp) font family ptm in TS1 encoding. + (textcomp) Default family used instead. + +(which in case of font family "ptm" is a false positive). Add either + +:warn: turn the error in a warning, use the default symbol (bitmap), or +:force,almostfull: use the symbol provided by the font at the users + risk, + +to the document options or use a different font package. + + +Warning: language … not supported +````````````````````````````````` + +The "latex" writer uses the LaTeX package Babel_ and the "xetex" writer +uses Polyglossia_ for language_ support (hyphenation rules, auto-text +localisations and typographic rules). Polyglossia_ supports more +languages, so switching to the "xetex_" writer may help. + +For short quotes or if language support is provided by the user via other +`LaTeX document classes and packages`_, the warning can be ignored. + +.. _Babel: https://ctan.org/pkg/babel +.. _Polyglossia: https://ctan.org/pkg/polyglossia + + +Search and text extraction +`````````````````````````` + +Search for text that contains characters outside the ASCII range might +fail. See font_ and `font encoding`_ (as well as `Searching PDF files`_ +for background information). + +It may help to load the `cmap` package (via `style sheets`_ or the custom +`LaTeX preamble`_ (see also `Proper use of cmap and mmmap`_). + +.. _Searching PDF files: + http://www.tex.ac.uk/cgi-bin/texfaq2html?label=srchpdf +.. _Proper use of cmap and mmmap: + https://tex.stackexchange.com/questions/64409/proper-use-of-cmap-and-mmap + + +Unicode box drawing and block characters +```````````````````````````````````````` + +The easiest solution is to use xelatex_ for `PDF generation`_. + +With "traditional" TeX engines (e.g. pdflatex_): + +- Generate LaTeX code with `output-encoding`_ "utf-8". + +- Add the pmboxdraw_ package to the `style sheets`_. + (For shaded boxes also add the `color` package.) + +Unfortunately, this defines only a subset of the characters +(see pmboxdraw.pdf_ for a list). + +.. _pmboxdraw: https://ctan.org/pkg/pmboxdraw +.. _pmboxdraw.pdf: + https://mirrors.ctan.org/macros/latex/contrib/pmboxdraw/pmboxdraw.pdf + + +Bugs and open issues +-------------------- + +Open to be fixed or open to discussion. + +See also the entries in the `Docutils TODO list`_, +the BUGS_ documentation and the `SourceForge Bug Tracker`_. + +.. _Docutils TODO list: ../dev/todo.html#latex-writer +.. _bugs: ../../BUGS.html +.. _SourceForge Bug Tracker: https://sourceforge.net/p/docutils/bugs/ + + +Footnotes and citations +``````````````````````` + +Initially both were implemented using figure floats, because hyperlinking +back and forth seemed to be impossible. Later the `figure` directive was +added that puts images into figure floats. + +This results in footnotes, citations, and figures possibly being mixed at +page foot. + +Workaround: + Select citation handling with the use_latex_citations_ option. + +If ``use-latex-citations`` is used, a bibliography is inserted right at +the end of the document. *This should be customizable*. + +If ``use-latex-citations`` is used adjacent citation references (separated +only by a single space or a newline) are combined to a single citation +group, i.e. ``[cite1]_ [cite2]_`` results in ``\cite{cite1,cite2}``. +The appearance in the output can be configured in a `style sheet`_. + +.. _use_latex_citations: config.html#use-latex-citations + + +Tables +`````` + +* Too wide tables (cf. `bug #422`_): + + Try the new_column_widths_ algorithm or use the `"widths" option`_ to + manually set the table column widths. + +* Table cells with both multirow and multicolumn are currently not possible. + +.. _bug #422: https://sourceforge.net/p/docutils/bugs/422/ + + +Figures +``````` + +* Figures are always as wide as the containing text. The "figwidth" argument + is currently not supported. As a consequence, the "align" argument has no + effect. + +* Wrapping text around figures is currently not supported. (Requires the + `wrapfig`_ package.) + +.. _wrapfig: https://ctan.org/pkg/wrapfig + + +Miscellaneous +````````````` + +* Pdfbookmark level 4 (and greater) does not work (might be settable but + complicated). + +* Hyperlinks are not hyphenated; this leads to bad spacing. See + docs/user/rst/demo.txt 2.14 directives. + +* Pagestyle headings does not work, when sections are starred. Use LaTeX for + the section numbering with the `options/settings`_ + ``--no-section-numbers`` (command line) or ``sectnum_xform: False`` + (config file). diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/links.txt b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/links.txt new file mode 100644 index 00000000..3c52436d --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/links.txt @@ -0,0 +1,454 @@ +===================== + Docutils_ Link List +===================== + +:Author: Lea Wiemann, the Docutils team +:Contact: docutils-develop@lists.sourceforge.net +:Revision: $Revision$ +:Date: $Date$ +:Copyright: This document has been placed in the public domain. + +.. title:: Docutils Links + +.. contents:: + +This document contains links users of Docutils and reStructuredText +may find useful. Many of the projects +listed here are hosted in the `Docutils Sandbox`_. If you have +something to publish, you can get write access, too! + +The most current version of this link list can always be found at +https://docutils.sourceforge.io/docs/user/links.html. + +.. _Docutils: https://docutils.sourceforge.io/ +.. _Docutils Sandbox: https://docutils.sourceforge.io/sandbox/README.html + + +Editors +------- + +Advanced text editors with reStructuredText support, IDEs, and docutils GUIs: + +* Emacs `rst mode <https://docutils.sourceforge.io/tools/editors/emacs>`__. + +* `Vim <https://www.vim.org/index.php>`__: + + - `reStructuredText syntax highlighting mode + <https://www.vim.org/scripts/script.php?script_id=973>`__, + + - `VST <https://www.vim.org/scripts/script.php?script_id=1334>`__ (Vim + reStructuredText) plugin for Vim7 with folding. + + - `VOoM <https://www.vim.org/scripts/script.php?script_id=2657>`__ + plugin for Vim that emulates two-pane outliner with + support for reStructuredText (since version 4.0b2). + + - `Riv: Take notes in rst <https://github.com/Rykka/riv.vim>`__ Vim + plugin to take notes in reStructured text. + +* `reStructuredText Language Support for Visual Studio Code`__ + + __ https://github.com/vscode-restructuredtext/vscode-restructuredtext + +* `reStructuredText editor plug-in for Eclipse`__ + + __ http://resteditor.sourceforge.net/ + +* `JED <https://www.jedsoft.org/jed/>`__ programmers editor with + `rst mode <httpss://jedmodes.sourceforge.io/mode/rst/>`__ + +* Gnome's gedit offers syntax highlighting and a reST preview pane. + + Latest version of the plugin is available from `bittner @ github`_ + (See also: `Gedit third party plugins`__). + + .. _bittner @ github: https://github.com/bittner/gedit-reST-plugin + __ https://wiki.gnome.org/Apps/Gedit/ThirdPartyPlugins-v3.8 + + +* Gunnar Schwant's DocFactory_ is a wxPython GUI application for + Docutils. + + .. _DocFactory: https://docutils.sourceforge.io/sandbox/gschwant/docfactory/doc/ + +* ReSTedit_ by Bill Bumgarner is a Docutils GUI for Mac OS X. + + .. _ReSTedit: https://svn.red-bean.com/restedit/trunk/README.html + +* `ReText <https://pypi.org/project/ReText/>`_ is a simple but powerful + editor for Markdown and reStructuredText markup languages. + It is written in Python using PyQt libraries. + +* Leo_ is an outliner_, written in Python using PyQt. It can be used as IDE + for literal programming, as a filing cabinet holding any kind of data and + as `document editor`__ with outlines containing reStructuredText markup. + + .. _Leo: https://leoeditor.com/ + .. _outliner: https://en.wikipedia.org/wiki/Outliner + __ https://leoeditor.com/tutorial-rst3.html + +* `NoTex <https://notex.ch>`_ is a browser-based reStructuredText editor + with syntax highlighting and PDF/HTML export functionality using Sphinx. + +* `rsted <https://github.com/anru/rsted>`_ is a "simple online editor for + reStructuredText on Flask". You can try it on http://rst.ninjs.org/ + + +Export +------ + +Projects providing additional export routes. + +PDF +``` + +* `rst2pdf (reportlab)`__ is a tool to go directly from + reStructuredText to PDF, via ReportLab__. No LaTeX installation + is required. + + __ https://pypi.org/project/rst2pdf/ + __ https://pypi.org/project/reportlab/ + +* `rst2pdf (pdflatex)`__ by Martin Blais is a minimal front end + producing LaTeX, compiling the LaTeX file, getting the produced + output to the destination location and finally deleting all the + messy temporary files that this process generates. + + __ https://docutils.sourceforge.io/sandbox/blais/rst2pdf/ + +* `rst2pdf (rubber)`__ is a front end for the generation of PDF + documents from a reStructuredText source via LaTeX in one step + cleaning up intermediate files. It uses the rubber__ Python wrapper + for LaTeX and friends. + + __ https://docutils.sourceforge.io/sandbox/rst2pdf/README.html + __ https://launchpad.net/rubber + +* rlpdf_ is another PDF Writer based on ReportLabs. + + .. _rlpdf: https://docutils.sourceforge.io/sandbox/dreamcatcher/rlpdf/ + +* RinohType_ is a pure Python PDF Writer based on a document template and a + style sheet (beta). + + .. _RinohType: https://pypi.python.org/pypi/RinohType + +website generators and HTML variants +```````````````````````````````````` + +* The Sphinx_ Python Documentation Generator by Georg Brandl was + originally created to translate the `Python documentation`_. + In the meantime, there is a wide range of `Projects using Sphinx`__ + + It can generate complete web sites (interlinked and indexed HTML pages), + ePub, LaTeX, and others from a set of rST source files. + + .. _Sphinx: https://www.sphinx-doc.org + __ https://www.sphinx-doc.org/en/master/examples.html + +* The Nikola_ static site generator, uses reStructuredText by + default. + + .. _nikola: https://getnikola.com/ + +* Pelican_ is a static site generator (mainly for blogs). Articles/pages can + be written in reStructuredText or Markdown_ format. + + .. _pelican: https://docs.getpelican.com + +* tinkerer_ is a static bloggin framework based on Sphinx_. + + .. _tinkerer: https://pypi.org/project/Tinkerer/ + +* htmlnav_ by Gunnar Schwant, is an HTML writer which supports navigation + bars. + + .. _htmlnav: https://docutils.sourceforge.io/sandbox/gschwant/htmlnav/ + +* rest2web, by Michael Foord, is a tool for creating web sites with + reStructuredText. Development stalled, there is a fork at + https://gitlab.com/wavexx/rest2web + +* `html4trans <https://docutils.sourceforge.io/sandbox/html4trans/>`__ + produces XHTML conforming to the version 1.0 Transitional DTD that + contains enough formatting information to be viewed by a lightweight HTML + browser without CSS support. + +* A `simple HTML writer`_ by Bill Bumgarner that doesn't rely on CSS + stylesheets. + + .. _simple HTML writer: https://docutils.sourceforge.io/sandbox/bbum/DocArticle/ + +ePub +```` + +* rst2epub2_ by Matt Harrison includes the epublib (originally by Tim + Tambin) and a rst2epub.py executable for the conversion. + + .. _rst2epub2: https://github.com/mattharrison/rst2epub2 + +* Sphinx_ provides ePub as output option, too. + + +Others +`````` + +* Pandoc_ is a document converter that can write Markdown_, + reStructuredText, HTML, LaTeX, RTF, DocBook XML, and S5. + + .. _Pandoc: https://pandoc.org/ + +* restxsl_ by Michael Alyn Miller, lets you transform reStructuredText + documents into XML/XHTML files using XSLT stylesheets. + + .. _restxsl: http://www.strangeGizmo.com/products/restxsl/ + +* An `XSLT script`__ by Ladislav Lhotka enables reStructuredText annotations + to be included in RELAG NG XML schemas. + + __ https://www.cesnet.cz/doc/techzpravy/2006/rngrest/ + +* `DocBook Writer`_ by Oliver Rutherfurd. + + .. _DocBook Writer: https://docutils.sourceforge.io/sandbox/oliverr/docbook/ + +* Nabu_, written by Martin Blais, is a publishing system which + extracts information from reStructuredText documents and stores it + in a database. Python knowledge is required to write extractor + functions and to retrieve the data from the database again. + + .. _Nabu: https://github.com/blais/nabu + +* The `pickle writer`_ by Martin Blais pickles the document tree to a binary + string. Later unpickling will allow you to publish with other Writers. + + .. _pickle writer: https://docutils.sourceforge.io/sandbox/blais/pickle_writer/ + +* The `Texinfo Writer`_, by Jon Waltman converts reStructuredText to + Texinfo, the documentation format used by the GNU project and the + Emacs text editor. Texinfo can be used to produce multiple output + formats, including HTML, PDF, and Info. + + .. _Texinfo Writer: https://docutils.sourceforge.io/sandbox/texinfo-writer/README.html + +* For `confluence CMS`_ see https://github.com/netresearch/rst2confluence. + + .. _confluence CMS: https://www.atlassian.com/software/confluence + +* Deploying into wikis might be aided by deploy-rst_. + + .. _deploy-rst: https://github.com/netresearch/deploy-rst + + +Import +------ + +Convert other formats to reStructuredText: + +* recommonmark_ is a Markdown_ (CommonMark_) parser for + docutils originally created by Luca Barbato. + + Docutils "markdown" parser (new in Docutils 0.17) is a wrapper + around recommonmark. + + .. _recommonmark: https://github.com/rtfd/recommonmark + .. _Markdown: https://daringfireball.net/projects/markdown/syntax + .. _CommonMark: https://commonmark.org/ + + +* sxw2rest_, by Trent W. Buck, converts StarOffice XML Writer (SXW) + files to reStructuredText. (link down) + + .. _sxw2rest: https://twb.ath.cx/~twb/darcs/sxw2rest/ + +* xml2rst_, an XSLT stylesheet written by Stefan Merten, converts XML + dumps of the document tree (e.g. created with rst2xml.py) back to + reStructuredText. + + .. _xml2rst: http://www.merten-home.de/FreeSoftware/xml2rst/index.html + +* xhtml2rest_, written by Antonios Christofides, is a simple utility + to convert XHTML to reStructuredText. + + .. _xhtml2rest: https://docutils.sourceforge.io/sandbox/wiemann/xhtml2rest/ + +* DashTable_ by Gustav Klopp converts HTML tables into reStructuredText. + Colspan and Rowspan supported! + + .. _DashTable: https://github.com/gustavklopp/DashTable + +* Sphinx_ includes a `LaTeX to Rst converter + <https://svn.python.org/projects/doctools/converter/>`__ in its source code + (trimmed to importing the old Python docs). + +* Pandoc_ can read Markdown_ and (subsets of) HTML, and LaTeX and + export to (amongst others) reStructuredText. + +* PySource_, by Tony Ibbs, is an experimental Python source Reader. + There is some related code in David Goodger's sandbox + (pysource_reader_) and a `Python Source Reader`_ document. + + .. _PySource: https://docutils.sourceforge.io/sandbox/tibs/pysource/ + .. _pysource_reader: https://docutils.sourceforge.io/sandbox/davidg/pysource_reader/ + .. _Python Source Reader: https://docutils.sourceforge.io/docs/dev/pysource.html + + +Extensions +---------- + +Extend the reStructuredText syntax or the features of Docutils. +More extensions are in the `Docutils Sandbox`_. + +* Beni Cherniavsky has written a generic `preprocessing module`_ for + roles and/or directives and built preprocessors for TeX math for + both LaTeX and HTML output on top of it. + + .. _preprocessing module: https://docutils.sourceforge.io/sandbox/cben/rolehack/ + +* Beni Cherniavsky maintains a Makefile_ for driving Docutils, hoping + to handle everything one might do with Docutils. + + .. _Makefile: https://docutils.sourceforge.io/sandbox/cben/make/ + +* The `ASCII art to SVG converter`_ (aafigure) developed by + Chris Liechti can parse ASCII art images, embedded in reST documents and + output an image. This would mean that simple illustrations could be + embedded as ASCII art in the reST source and still look nice when + converted to e.g. HTML + + .. _ASCII art to SVG converter: + https://docutils.sourceforge.io/sandbox/cliechti/aafigure/ + +* Quick and easy publishing reStructuredText source files as blog posts + on blogger.com is possible with `rst2blogger`_ . + + .. _rst2blogger: https://github.com/dhellmann/rst2blogger#readme + + +Related Applications +-------------------- + +Applications using docutils/reStructuredText and helper applications. + +* For Wikis, please see the `FAQ entry about Wikis`_. + +* For Blogs (Weblogs), please see the `FAQ entry about Blogs`_. + +* `Project Gutenberg`_ uses Docutils for its "ebookmaker_" + xetex, nroff, and epub generator (with some `extensions to rST`__). + + __ http://pgrst.pglaf.org/publish/181/181-h.html + + +* Text-Restructured_ at CPAN is a set of modules to parse + reStructuredText documents and output them in various formats written + in Perl_. + Up to January 2021, the sources were stored in the Docutils repository_. + After long inactivity (the last commit was r6498__ + 2010-12-08), ``trunk/prest/`` was moved to the attic. + + __ https://sourceforge.net/p/docutils/code/6498/ + +.. _FAQ entry about Wikis: http://docutils.sf.net/FAQ.html + #are-there-any-wikis-that-use-restructuredtext-syntax +.. _FAQ entry about Blogs: https://docutils.sourceforge.io/FAQ.html + #are-there-any-weblog-blog-projects-that-use-restructuredtext-syntax +.. _Project Gutenberg: http://www.gutenberg.org +.. _ebookmaker: https://pypi.org/project/ebookmaker/ +.. _Perl: https://www.perl.org +.. _Text-Restructured: https://metacpan.org/dist/Text-Restructured +.. _repository: ../dev/repository.html + +Tools +````` + +* rstcheck_ Checks syntax of reStructuredText and code blocks nested within + it. (Using the Sphinx syntax "code-block" for the "code" directive.) + + .. _rstcheck: https://pypi.python.org/pypi/rstcheck + +* restview_ is a viewer for ReStructuredText documents. + + Pass the name of a ReStructuredText document to restview, and it will + launch a web server on localhost:random-port and open a web browser. It + will also watch for changes in that file and automatically reload and + rerender it. This is very convenient for previewing a document while + you're editing it. + + .. _restview: https://mg.pov.lt/restview/ + + +Development +``````````` + +* Sphinx_ extends the ReStructuredText syntax to better support the + documentation of Software (and other) *projects* (but other documents + can be written with it too). + + The `Python documentation`_ is based on reStructuredText and Sphinx. + + .. _Python documentation: https://docs.python.org/ + +* Trac_, a project management and bug/issue tracking system, supports + `using reStructuredText + <https://trac.edgewall.org/wiki/WikiRestructuredText>`__ as an + alternative to wiki markup. + + .. _Trac: https://trac.edgewall.org/ + +* PyLit_ provides a bidirectional text <--> code converter for *literate + programming with reStructuredText*. + + .. _PyLit: https://repo.or.cz/pylit.git + + +CMS Systems +``````````` + +* Plone_ and Zope_ both support reStructuredText markup. + +* ZReST_, by Richard Jones, is a "ReStructuredText Document for Zope_" + application that is complete and ready to install. + +.. _Plone: https://plone.org/ +.. _Zope: https://www.zope.dev/ +.. _ZReST: https://docutils.sourceforge.io/sandbox/richard/ZReST/ + + +Presentations +````````````` + +* rst2html5_ transform restructuredtext documents to html5 + twitter's + bootstrap css, deck.js or reveal.js + + .. _rst2html5: https://github.com/marianoguerra/rst2html5 + +* landslide_ generates HTML5 slideshows from markdown, ReST, or textile. + + .. _landslide: https://github.com/adamzap/landslide + +* `native support for S5 <slide-shows.s5.html>`_. + +* The `PythonPoint interface`_ by Richard Jones produces PDF + presentations using ReportLabs' PythonPoint. + + .. _PythonPoint interface: + https://docutils.sourceforge.io/sandbox/richard/pythonpoint/ + +* rst2beamer_ generates a LaTeX source that uses the `Beamer` document class. + Can be converted to PDF slides with pdfLaTeX/XeLaTeX/LuaLaTeX. + + .. _rst2beamer: https://docutils.sourceforge.io/sandbox/rst2beamer/ + +* InkSlide_ quick and easy presentations using Inkscape_. InkSlide uses + reStructuredText for markup, although it renders only a subset of rst. + + .. _InkSlide: http://wiki.inkscape.org/wiki/index.php/InkSlide + .. _Inkscape: http://inkscape.org/ + +* rst2outline_ translates a reStructuredText document to a plain text + outline. This can then be transformed to PowerPoint. + + .. _rst2outline: https://docutils.sourceforge.io/sandbox/rst2outline/ + +* Pandoc_ can also be used to produce slides diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/mailing-lists.txt b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/mailing-lists.txt new file mode 100644 index 00000000..20a8bd63 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/mailing-lists.txt @@ -0,0 +1,150 @@ +========================= + Docutils_ Mailing Lists +========================= + +:Author: Lea Wiemann +:Contact: docutils-develop@lists.sourceforge.net +:Revision: $Revision$ +:Date: $Date$ +:Copyright: This document has been placed in the public domain. + + +.. Gmane went down 2016 and is only partially restored. + http://home.gmane.org/2016/08/29/next-steps-gmane/ + +.. raw:: html + + <div class="sidebar"> + <p class="first sidebar-title">Search the list archives</p> + <form method="get" action="https://sourceforge.net/p/docutils/mailman/search/"> + <p style="margin-bottom: 3px;"><input type="text" name="q" style="width: 100%;" /></p> + <p>Search in <select name="mail_list"> + <option value="all">all mailing lists</option> + <option value="docutils-users">Docutils-users</option> + <option value="docutils-develop">Docutils-develop</option> + <option value="docutils-checkins">Docutils-checkins</option> + </select></p> + <p class="last"><input type="submit" value="Search" /></p> + </form> + </div> + +All discussion about Docutils takes place on mailing lists. + +There are four different lists with traffic related to Docutils. +For an oversight, see https://sourceforge.net/p/docutils/mailman/. +If unsure, use the **Docutils-users** mailing list: + + +Docutils-users +-------------- + +The `Docutils-users mailing list`_ is a place to discuss any issues +related to the usage of Docutils and reStructuredText. (Please be +sure to check the FAQ_ first.) + +There are several possibilities to **read and post** messages on the +mailing lists; use the one you feel most comfortable with. + +* Using an `email subscription`__. This is the traditional way; you + will receive all messages sent to the mailing list via email. + + __ `docutils-users mailing list`_ + +* Use a newsreader with Gmane's `NNTP interface`__ + (gmane.text.docutils.user on news.gmane.org). + + __ nntp://news.gmane.org/gmane.text.docutils.user + +* **If you do not wish to subscribe,** you can also just send an email + message with your question or comment to + Docutils-users@lists.sourceforge.net. + + Note in your message that you are not subscribed (to make sure that you + receive copies [CCs] of any replies) or check for answers in the + `Docutils-users Archives`_. + +The first time you post a message without being subscribed +you will receive an automatic response with the subject +"Your message to Docutils-users awaits moderator approval"; this is done to +prevent spam to the mailing lists. Your message will usually be approved +within a few hours. To avoid duplicates, please do not resend your message +using a different email address. After your first message has been +approved, your email address will be added to the whitelist and future +messages will be posted to the mailing list without moderation. + +To see the collection of prior postings to the list, visit the +`Docutils-users Archives`_. + + +Docutils-develop +---------------- + +Discussions about developing and extending Docutils take place on the +`Docutils-develop mailing list`_. + +You can access this list via `email subscription`__ or news__ +(gmane.text.docutils.devel); the posting address is +Docutils-develop@lists.sourceforge.net. + +To see the collection of prior postings to the list, visit the +`Docutils-develop Archives`__. + +__ `Docutils-develop mailing list`_ +__ nntp://news.gmane.org/gmane.text.docutils.devel +__ https://sourceforge.net/mailarchive/forum.php?forum_name=docutils-develop + +Docutils-checkins +----------------- + +All check-ins to the `Subversion repository`_ cause a "check-in email" +to the `Docutils-checkins list`_. In order to stay informed about +current development, developers are advised to monitor this mailing +list. + +This mailing list is for reading only; please direct any discussion +about the check-ins to Docutils-develop. (For your convenience, the +Reply-To header of all check-in emails points to Docutils-develop.) + +This mailing list is accessible via `email subscription`__ or +news__ (gmane.text.docutils.cvs) as well. + +If you are using an email subscription and you would prefer to only +receive check-in messages for changes that affect the main Docutils +distribution (i.e. ``trunk/docutils/*``), go to the `list options`_ +page and select the "Docutils core" topic. + +__ `Docutils-checkins list`_ +__ nntp://news.gmane.org/gmane.text.docutils.cvs +.. _list options: https://lists.sourceforge.net/lists/options/docutils-checkins + +Doc-SIG +------- + +The "Python Documentation Special Interest Group" (`Doc-SIG`_) mailing list +is occasionally used to discuss the usage of Docutils for Python +documentation. + +This mailing list can be accessed via `email subscription`__ or +news__ (gmane.comp.python.documentation) as well. You must be +subscribed in order to post messages to this mailing list. + +__ `Doc-SIG`_ +__ nntp://news.gmane.org/gmane.comp.python.documentation + + +.. _Docutils-users mailing list: + https://lists.sourceforge.net/lists/listinfo/docutils-users +.. _Docutils-users Archives: + https://sourceforge.net/mailarchive/forum.php?forum_name=docutils-users +.. _Docutils-develop mailing list: + https://lists.sourceforge.net/lists/listinfo/docutils-develop +.. _Docutils-develop Archives: + https://sourceforge.net/mailarchive/forum.php?forum_name=docutils-develop +.. _Docutils-checkins list: + https://lists.sourceforge.net/lists/listinfo/docutils-checkins +.. _Doc-SIG: + https://mail.python.org/mailman/listinfo/doc-sig + +.. _Subversion repository: ../dev/repository.html +.. _Docutils: https://docutils.sourceforge.io/ +.. _FAQ: ../../FAQ.html diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/manpage.txt b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/manpage.txt new file mode 100644 index 00000000..75ae3e14 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/manpage.txt @@ -0,0 +1,168 @@ +============================== + manpage writer for Docutils_ +============================== + +:Author: Engelbert Gruber +:Contact: docutils-develop@lists.sourceforge.net +:Revision: $Revision$ +:Date: $Date$ +:Copyright: This document has been placed in the public domain. + +This writer explores the possibilities to generate man-pages from +reStructuredText. Man pages are the way for Unix systems to provide +help to the user. GNU does this with (TeX)info-pages. + +.. contents:: + + +Module information +'''''''''''''''''' + +Unix man page belong into a numbered section, 1 is user commands, 8 contains +administrator commands, the headlines of all manpages are collected into a +database, queryable with the program ``apropos``, therefore the headline +should contain a short text describing into which group this command belongs. + +That information is collected from the title, subtitle and docinfo. + +Also man pages have a defined set of sections, that are more or less +mandatory, see References_. + +man pages look like:: + + man(1) Man Pager Utils man(1) + + NAME + man - an interface to the on-line reference manuals + + SYNOPSIS + man [-c|-w|-tZT device] [-adhu7V] [-m system[,...]] [-L locale] + +in roff formatting:: + + .TH man 1 "14 May 2001" "2.3.19" "Manual pager utils" + .SH NAME + man \- an interface to the on-line reference manuals + .SH SYNOPSIS + .\" The general command line + .B man + .RB [\| \-c \||\| \-w \||\| \-tZT + .IR device \|] + +This means we have + +* a title "man" +* a subtitle "an interface to the on-line reference manuals" +* a manual section "1" +* a manual group "Manual pager utils" +* a date "14 May 2001" +* a version "2.3.19" + +References +'''''''''' + +man pages from section 7, ``man`` and ``man-pages``. + +.. [LMHT] `Linux Man Page Howto <https://tldp.org/HOWTO/Man-Page/>`__. + +Conventions +''''''''''' + +* man pages have a special structure and organization. From the manpage + to *man*:: + + The table below shows the section numbers of the manual followed by the + types of pages they contain. + + 1 Executable programs or shell commands + 2 System calls (functions provided by the kernel) + 3 Library calls (functions within program libraries) + 4 Special files (usually found in /dev) + 5 File formats and conventions eg /etc/passwd + 6 Games + 7 Miscellaneous (including macro packages and conven- + tions), e.g. man(7), groff(7) + 8 System administration commands (usually only for root) + 9 Kernel routines [Non standard] + + A manual page consists of several parts. + + They may be labelled NAME, SYNOPSIS, DESCRIPTION, OPTIONS, FILES, + SEE ALSO, BUGS, and AUTHOR. + + The following conventions apply to the SYNOPSIS section and can be used + as a guide in other sections. + + bold text type exactly as shown. + italic text replace with appropriate argument. + [-abc] any or all arguments within [ ] are optional. + -a|-b options delimited by | cannot be used together. + argument ... argument is repeatable. + [expression] ... entire expression within [ ] is repeatable. + + The command or function illustration is a pattern that should match all + possible invocations. In some cases it is advisable to illustrate sev- + eral exclusive invocations as is shown in the SYNOPSIS section of this + manual page. + +* new lines in general. + + Consecutive blank lines are merged by the viewer but not on printouts. + So one has to be cautious. This is most disturbing when printing + postscript. + + .. NOTE:: + + 1. Roff requests only work when at line start. + 2. But consecutive blank lines are merged by the viewer but not on + printouts. + + So try the rule start new lines in ``visit_``-functions, but only if + necessary. E.g. ``field-names`` are already on a new line because of + docutils structure. + +* Indentation, left margin: + + - The writer includes two macros ``.INDENT`` and ``.UNINDENT`` that + keep track of the indentation in roff-code, for line-blocks python + keeps track of it. WHAT should be the preferred way ? + + But standard macros like ``.PP`` might reset it. + + - Why do ``.RE`` and ``.RS`` not work? + + .. Note:: + Current indent is in register ``.i``. + +* [LMHT]_ Filenames are always in italics, except in the SYNOPSIS section, + use:: + + .I /usr/include/stdio.h + + and:: + + .B #include <stdio.h> + +* Tables are possible, via the external processor tbl, although one should + avoid them. + +TODO - Open issues +'''''''''''''''''' + +* How to escape double quotes in macro arguments ? +* Typeset URLs : ``man 7 man`` on linux says use ``.UR`` and ``.UE``. +* How to typeset command/manpage names in text. +* How to write long syntax lines. +* Line ends around email or web addresses in texts. + How to distinguish something is inline or not ? + +* Images and equations are discouraged. +* Lists in admonitions are not intended. +* Encoding declaration ``'\" t -*- coding: ISO-8859-1 -*-`` + in first line. + + BUT if UTF-8 is declared tables are no longer processed. + +* Input and output encoding are problematic at least. + +.. _Docutils: https://docutils.sourceforge.io/ diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/odt.txt b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/odt.txt new file mode 100644 index 00000000..de080659 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/odt.txt @@ -0,0 +1,1200 @@ +======================= +Odt Writer for Docutils +======================= + +:Author: Dave Kuhlman +:Contact: docutils-develop@lists.sourceforge.net +:Revision: $Revision$ +:Date: $Date$ +:Copyright: This document has been placed in the public domain. + +:abstract: This document describes the Docutils odtwriter + (rst2odt.py). + +.. sectnum:: + +.. contents:: + + +Introduction +============ + +What it does -- ``rst2odt.py`` translates reST +(reStructuredText) into a Open Document Format ``.odt`` file. You +can learn more about the ODF format here: + +- `OASIS Open Document Format for Office Applications + (OpenDocument) TC`_ + +- `Open Document at Wikipedia`_ + +You should be able to open documents (.odt files) generated with +``rst2odt.py`` in ``OpenOffice/oowriter``. + +You can learn more about Docutils and reST here: `Docutils`_ + + +Requirements +============ + +In addition to the Docutils standard requirements, ``odtwriter`` +requires: + +- Optional -- `Pygments`_ is required if you want syntax + highlighting of code in literal blocks. See section `Syntax + highlighting`_. + +- Optional -- `Python Imaging Library`_ (PIL) is required if on an + image or figure directive, you specify ``scale`` but not ``width`` + and ``height``. See section `Images and figures`_. + + + +How to Use It +============= + +Run it from the command line as follows:: + + $ rst2odt.py myinput.txt myoutput.odt + +To see usage information and to learn about command line options +that you can use, run the following:: + + $ rst2odt.py --help + +Examples:: + + $ rst2odt.py -s -g python_comments.txt python_comments.odt + + $ rst2odt.py --source-url=odtwriter.txt --generator \ + --stylesheet=/myconfigs/styles.odt odtwriter.txt odtwriter.odt + + +Configuration file +------------------ + +The options described below can also be set in a configuration file. +Use section ``[odf_odt writer]`` to set options specific to the +``odtwriter``. For example:: + + [odf_odt writer] + stylesheet: styles1.odt + +See the "Docutils Configuration" document for more information on +Docutils configuration files, including locations which are +searched. + + +Command line options +-------------------- + +The following command line options are specific to ``odtwriter``: + +--stylesheet=<URL> Specify a stylesheet URL, used verbatim. + Default: writers/odf_odt/styles.odt in the + installation directory. +--odf-config-file=<file> + Specify a configuration/mapping file relative to the + current working directory for additional ODF options. + In particular, this file may contain a section named + "Formats" that maps default style names to names to be + used in the resulting output file allowing for + adhering to external standards. For more info and the + format of the configuration/mapping file, see the + odtwriter doc. +--cloak-email-addresses + Obfuscate email addresses to confuse harvesters while + still keeping email links usable with standards- + compliant browsers. +--no-cloak-email-addresses + Do not obfuscate email addresses. +--table-border-thickness=TABLE_BORDER_THICKNESS + Specify the thickness of table borders in thousands of + a cm. Default is 35. +--add-syntax-highlighting + Add syntax highlighting in literal code blocks. +--no-syntax-highlighting + Do not add syntax highlighting in literal code blocks. + (default) +--create-sections Create sections for headers. (default) +--no-sections Do not create sections for headers. +--create-links Create links. +--no-links Do not create links. (default) +--endnotes-end-doc Generate endnotes at end of document, not footnotes at + bottom of page. +--no-endnotes-end-doc Generate footnotes at bottom of page, not endnotes at + end of document. (default) +--generate-list-toc Generate a bullet list table of contents, not an + ODF/``oowriter`` table of contents. +--generate-oowriter-toc + Generate an ODF/``oowriter`` table of contents, + not a bullet list. (default) **Note:** + ``odtwriter`` is not able to determine page + numbers, so you will need to open the + generated document in ``oowriter``, then + right-click on the table of contents and + select "Update" to insert page numbers. +--custom-odt-header=CUSTOM_HEADER + Specify the contents of an custom header line. See + odf_odt writer documentation for details about special + field character sequences. See section + `Custom header/footers: inserting page numbers, date, time, etc`_ + for details +--custom-odt-footer=CUSTOM_FOOTER + Specify the contents of an custom footer line. See + odf_odt writer documentation for details about special + field character sequences. See section + `Custom header/footers: inserting page numbers, date, time, etc`_ + for details + + + +Styles and Classes +================== + +``odtwriter`` uses a number of styles that are defined in +``styles.xml`` in the default ``styles.odt``. This section +describes those styles. + +Note that with the ``--stylesheet`` command line option, you can +use either ``styles.odt`` or ``styles.xml``, as described below. +Use of ``styles.odt`` is recommended over ``styles.xml``. + +You can modify the look of documents generated by ``odtwriter`` in +several ways: + +- Open (a copy of) ``styles.odt`` in ``OpenOffice/oowriter`` and + modify the style you wish to change. Now, save this document, + then generate your documents using this modified copy of + ``styles.odt``. + + In my version of ``oowriter``, to modify styles, either (1) + press F11 or (2) use menu item "Format/Styles and Formatting", + then right-click on the relevant style and select "Modify". + Modify the style, then save your document. + +- Open a document generated by ``odtwriter`` in `oowriter``. Now, + edit the style you are interested in modifying. Now, you + can extract the styles.xml file from your document and either + (1) use this as your default styles file or (2) copy and paste + the relevant style definition into your styles.xml. + +- Extract ``styles.xml`` from ``styles.odt`` using your favorite + ``zip/unzip`` tool. Then modify ``styles.xml`` with a text + editor. Now re-zip it back into your own ``styles.odt``, or use + it directly by specifying it with the ``--stylesheet`` command + line option. **Hint:** If you intend to extract ``styles.xml`` + from an ``.odt`` file (and then "re-zip" it), you should turn off + XML optimization/compression in ``oowriter``. In order to this + in ``oowriter``, use Tools --> Options... --> Load-Save --> + General and turn off "Size optimization for XML format". + +- Open an empty (or new) document in ``oowriter``. Define all of + the styles described in this section. Then, use that document (a + .odt file) as your stylesheet. ``odtwriter`` will extract the + ``styles.xml`` file from that document and insert it into the + output document. + +- Some combination of the above. + + +Styles used by odtwriter +------------------------ + +This section describes the styles used by ``odtwriter``. + +Note that we do not describe the "look" of these styles. That can +be easily changed by using ``oowriter`` to edit the document +``styles.odt`` (or a copy of it), and modifying any of the styles +described here. + +To change the definition and appearance of these styles, open +``styles.odt`` in ``oowriter`` and open the Styles and Formatting +window by using the following menu item:: + + Format --> Styles and Formatting + +Then, click on the Paragraph Styles button or the Character Styles +button at the top of the Styles and Formatting window. You may +also need to select "All Styles" from the drop-down selection list +at the bottom of the Styles and Formatting window in order to see +the styles used by ``odtwriter``. + +Notice that you can make a copy of file ``styles.odt``, modify it +using ``oowriter``, and then use your copy with the +``--stylesheet=<file>`` command line option. Example:: + + $ rst2odt.py --stylesheet=mystyles.odt test2.txt test2.odt + + +Paragraph styles +~~~~~~~~~~~~~~~~ + +rststyle-attribution + The style for attributions, for example, the attribution in a + ``.. epigraph::`` directive. Derived from + ``rststyle-blockquote``. + +rststyle-blockindent + An indented block. + +rststyle-blockquote + A block quote. + +rststyle-blockquote-bulletitem + The style for bullet list items inside block quote. + +rststyle-blockquote-enumitem + The style for enumerated list items inside block quote. + +rststyle-bodyindent + An indented block. + +rststyle-bulletitem + An item in an bullet list. + +rststyle-caption + The caption in a figure or image. Also see + ``rststyle-legend``. + +rststyle-codeblock + Literal code blocks -- A block of example code. Created with + double colon ("::") followed by an indented block or with the + ``.. parsed-literal::`` directive. Derived from the + ``Preformatted Text`` style in ``oowriter``. + +rststyle-enumitem + An item in an enumerated list. + +rststyle-epigraph + The style for epigraphs, for example, the body of an + ``.. epigraph::`` directive. Derived from + ``rststyle-blockquote``. + +rststyle-epigraph-bulletitem + The style for bullet list items inside epigraphs. + +rststyle-epigraph-enumitem + The style for enumerated list items inside epigraphs. + +rststyle-footer + The style for footers. The footer content originates from the + ``..footer::`` directive and in response to the command line + flags for generator (``--generator``), date/time generated + (``--date`` and ``--time``), and view source link + (``--source-link`` and ``--source-url=URL``). + +rststyle-header + The style for headers. The header content originates from the + ``..header::`` directive. + +rststyle-highlights + The style for highlightss, for example, the body of an + ``.. highlights::`` directive. Derived from + ``rststyle-blockquote``. + +rststyle-highlights-bulletitem + The style for bullet list items inside highlights. + +rststyle-highlights-enumitem + The style for enumerated list items inside highlights. + +rststyle-horizontalline + A horizontal line, e.g. used for transitions. + +rststyle-legend + The legend in a figure. See the Docutils figure directive. Also + see ``rststyle-caption``. + +rststyle-table-title + The style for titles of tables. See section `The table + directive`_. + +rststyle-textbody + Normal text. The style for paragraphs. Derived from the ``Text + body`` style in ``oowriter``. + + +Character styles +~~~~~~~~~~~~~~~~ + +rststyle-emphasis + Emphasis. Normally rendered as italics. + +rststyle-inlineliteral + An inline literal. + +rststyle-strong + Strong emphasis. Normally rendered as boldface. + +rststyle-quotation + In-line quoted material. + +rststyle-codeblock-classname + Syntax highlighting in literal code blocks -- class names. + +rststyle-codeblock-comment + Syntax highlighting in literal code blocks -- comments. + +rststyle-codeblock-functionname + Syntax highlighting in literal code blocks -- function names. + +rststyle-codeblock-keyword + Syntax highlighting in literal code blocks -- Python language + keywords. + +rststyle-codeblock-name + Syntax highlighting in literal code blocks -- other names, for + example, variables. + +rststyle-codeblock-number + Syntax highlighting in literal code blocks -- literal numbers, + including integers, floats, hex numbers, and octal numbers. + +rststyle-codeblock-operator + Syntax highlighting in literal code blocks -- Python operators. + +rststyle-codeblock-string + Syntax highlighting in literal code blocks -- literal strings. + + +List styles +~~~~~~~~~~~ + +rststyle-bulletlist + Bullet lists (but not in the table of contents) + +rststyle-blockquote-bulletlist + Bullet lists in block quotes. + +rststyle-blockquote-enumlist + Enumerated lists in block quotes. + +rststyle-enumlist-arabic + Enumerated lists, arabic (but not in the table of contents) + +rststyle-enumlist-loweralpha + Enumerated lists, lower alpha (but not in the table of contents) + +rststyle-enumlist-lowerroman + Enumerated lists, lower roman (but not in the table of contents) + +rststyle-enumlist-upperalpha + Enumerated lists, upper alpha (but not in the table of contents) + +rststyle-enumlist-upperroman + Enumerated lists, upper roman (but not in the table of contents) + +rststyle-epigraph-bulletlist + Bullet lists in epigraphs. See the ``.. epigraph::`` + directive. + +rststyle-epigraph-enumlist + Enumerated lists in epigraphs. See the ``.. epigraph::`` + directive. + +rststyle-highlights-bulletlist + Bullet lists in highlights blocks. See the ``.. highlights::`` + directive. + +rststyle-highlights-enumlist + Enumerated lists in highlights blocks. See the ``.. highlights::`` + directive. + +rststyle-tocbulletlist + Lists in the table of contents when section numbering is off. + +rststyle-tocenumlist + Lists in the table of contents when section numbering is on. + + +Admonition styles +~~~~~~~~~~~~~~~~~ + +rststyle-admon-attention-hdr + The style for the attention admonition header/title. + +rststyle-admon-attention-body + The style for the attention admonition body/paragraph. + +rststyle-admon-caution-hdr + The style for the caution admonition header/title. + +rststyle-admon-caution-body + The style for the caution admonition body/paragraph. + +rststyle-admon-danger-hdr + The style for the admonition header/title. + +rststyle-admon-danger-body + The style for the danger admonition body/paragraph. + +rststyle-admon-error-hdr + The style for the error admonition header/title. + +rststyle-admon-error-body + The style for the error admonition body/paragraph. + +rststyle-admon-hint-hdr + The style for the hint admonition header/title. + +rststyle-admon-hint-body + The style for the hint admonition body/paragraph. + +rststyle-admon-hint-hdr + The style for the hint admonition header/title. + +rststyle-admon-hint-body + The style for the hint admonition body/paragraph. + +rststyle-admon-important-hdr + The style for the important admonition header/title. + +rststyle-admon-important-body + The style for the important admonition body/paragraph. + +rststyle-admon-note-hdr + The style for the note admonition header/title. + +rststyle-admon-note-hdr + The style for the note admonition header/title. + +rststyle-admon-tip-body + The style for the tip admonition body/paragraph. + +rststyle-admon-tip-hdr + The style for the tip admonition header/title. + +rststyle-admon-warning-body + The style for the warning admonition body/paragraph. + +rststyle-admon-warning-hdr + The style for the warning admonition header/title. + +rststyle-admon-generic-body + The style for the generic admonition body/paragraph. + +rststyle-admon-generic-hdr + The style for the generic admonition header/title. + + +Rubric style +~~~~~~~~~~~~ + +rststyle-rubric + The style for the text in a rubric directive. + +The rubric directive recognizes a "class" option. If entered, +odtwriter uses the value of that option instead of the +``rststyle-rubric`` style. Here is an example which which attaches +the ``rststyle-heading1`` style to the generated rubric:: + + .. rubric:: This is my first rubric + :class: rststyle-heading1 + + +Table styles +~~~~~~~~~~~~ + +A table style is generated by ``oowriter`` for each table that you +create. Therefore, ``odtwriter`` attempts to do something similar. +These styles are created in the ``content.xml`` document in the +generated ``.odt`` file. These styles have names prefixed with +"rststyle-table-". + +There are two ways in which you can control the styles of your +tables: one simple, the other a bit more complex, but more +powerful. + +First, you can change the thickness of the borders of all tables +generated in a document using the "--table-border-thickness" +command line option. + +Second, you can control additional table properties and you can +apply different styles to different tables within the same document +by customizing and using tables in your stylesheet: ``styles.odt`` +or whatever you name your copy of it using the --stylesheet command +line option. Then, follow these rules to apply a table style to +the tables in your document: + +- The default table style -- Optionally, alter and customize the + style applied by default to tables in your document by modifying + table "rststyle-table-0" in your stylesheet (``styles.odt`` or a + copy). Caution: Do not change the name of this table. + +- User-created table styles -- Add one or more new table styles to + be applied selectively to tables in your document by doing the + following: + + 1. Using ``oowriter``, add a table to your stylesheet and give it + a name that starts with the prefix "rststyle-table-", for + example "rststyle-table-vegetabledata". Customize the table's + border thickness, border color, and table background color. + + 2. In your reStructuredText document, apply your new table style + to a specific table by placing the ".. class::" directive + immediately before the table, for example:: + + .. class:: rststyle-table-vegetabledata + +The default table style will be applied to all tables for which you +do not specify a style with the ".. class::" directive. + +Customize the table properties in ``oowriter`` using the table +properties dialog for the table (style) that you wish to customize. + +Note that "--table-border-thickness" command line option overrides +the border thickness specified in the stylesheet. + +The specific properties that you can control with this second +method are the following: + +- Border thickness and border color. + +- Background color -- When you change the background color of a + table to be used as a style (in ``styles.odt`` or whatever you + name it), make sure you change the background color for the + *table* and *not* for a cell in the table. ``odtwriter`` picks + the background color from the table, not from a cell within the + table. + + +Line block styles +~~~~~~~~~~~~~~~~~~ + +The line block styles wrap the various nested levels of line +blocks. There is one line block style for each indent level. + +rststyle-lineblock1 + Line block style for line block with no indent. + +rststyle-lineblock2 + Line block style for line block indented 1 level. + +rststyle-lineblock3 + Line block style for line block indented 2 levels. + +rststyle-lineblock4 + Line block style for line block indented 3 levels. + +rststyle-lineblock5 + Line block style for line block indented 4 levels. + +rststyle-lineblock6 + Line block style for line block indented 5 levels. + +Notes: + +- ``odtwriter`` does not check for a maximum level of indents + within line blocks. Therefore, you can define additional line + block styles for additional levels if you need them. Define + these styles with the names ``rststyle-lineblock7``, + ``rststyle-lineblock8``, ... + +- Since the line block style is used to create indentation, a line + block that is inside a block quote will use + ``rststyle-lineblock2`` as its first level of indentation. + + +Footnote and citation styles +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +rststyle-footnote + The style for footnotes. This style affects the footnote + content, *not* the footnote reference in the body of the document. + +rststyle-citation + The style for citations. This style affects the citation + content, *not* the citation reference in the body of the document. + You might need to adjust the indentation in this style + depending on the length of the label used in your citations. + + +Heading and title styles +~~~~~~~~~~~~~~~~~~~~~~~~~ + +rststyle-heading{1|2|3|4|5} + The styles for headings (section titles and sub-titles). Five + levels of sub-headings are provided: rststyle-heading1 through + rststyle-heading5. + +rststyle-title + The style for the document title. + +rststyle-subtitle + The style for the document sub-title. + + +Image and figure styles +~~~~~~~~~~~~~~~~~~~~~~~~~ + +rststyle-image + The style applied to an image, either an image by itself or an + image in a figure. + +rststyle-figureframe + The style applied to a figure (actually to the frame that + surrounds a figure). + + + +Defining and using a custom stylesheet +--------------------------------------- + +You can create your own custom stylesheet. Here is how: + +1. Make a copy of ``styles.odt``, which is in the distribution. + +2. Open your copy of ``styles.odt`` in ``oowriter``. Modify styles + in that document. Then, save it. + +3. When you run ``rst2odt.py``, use the ``--stylesheet`` command + line option to use your custom stylesheet. Run ``rst2odt.py + --help`` to learn more about these options. + + +Why custom stylesheets +~~~~~~~~~~~~~~~~~~~~~~~ + +Here are a few reasons and ideas: + +- The page size is stored in the style sheet. The default page + size is ``Letter``. You can change the page size (for example, + to ``A4``) in your custom stylesheet by opening it in + ``oowriter``, then clicking on menu: ``Format/Page...``, then + clicking on the ``Page`` tab. + + + +Defining and using custom style names +------------------------------------- + +[Credits: Stefan Merten designed and implemented the custom style names +capability. Thank you, Stefan.] + +You can also instruct ``odtwriter`` to use style names of your own +choice. + + +Why custom style names +~~~~~~~~~~~~~~~~~~~~~~ + +Here are a few reasons and ideas: + +- Suppose that your organization has a standard set of styles in + OOo ``oowriter`` and suppose that the use of these styles is + required. You would like to generate ODF documents from + reST text files, and you want the generated documents to contain + these styles. + +- Suppose that your company or organization has a policy of using a + certain MS Word template for some set of documents. You would + like to generate ODF documents that use these custom style names, + so that you can export these documents from ODF ``oowriter`` to MS + Word documents that use these style names. + +- Suppose that your documents are written in a language other than + English. You would like the style names visible in the "Styles + and Formatting" window in OOo ``oowriter`` (menu item + ``Format/Styles and Formatting``) to be understandable in the + language of your users. + +- ``odtwriter`` maps single asterisks/stars (for example, \*stuff\*) + to emphasis and double stars to strong. You'd like to reverse + these. Or, you would like to generate headings level 3 and 4 + where headings level 1 and 2 would normally be produced. + + +How to use custom style names +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In order to define custom style names and to generate documents that +contain them, do the following: + + +1. Create a configuration file containing a "Formats" section. The + configuration file obeys the file format supported by the Python + ConfigParser module: + `ConfigParser -- Configuration file parser -- + https://docs.python.org/3/library/configparser.html + <https://docs.python.org/3/library/configparser.html>`_. + +2. In the "Formats" section of the configuration file, create one + option (a name-value pair) for each custom style name that you + wish to define. The option name is the standard ``odtwriter`` + style name (without "rststyle-"), and the value is your custom + style name. Here is an example:: + + [Formats] + textbody: mytextbody + bulletitem: mybulletitem + heading1: myheading1 + o + o + o + +3. Create a styles document that defines the styles generated by + ``odtwriter``. You can create and edit the styles in OOo + ``oowriter``. It may be helpful to begin by making a copy of the + styles document that is part of the ``odtwriter`` distribution + (``styles.odt``). + +4. When you run ``odtwriter``, specify the ``--odf-config-file`` + option. You might also want to specify your styles document + using the ``--stylesheet`` option in order to include your + custom style definitions. For example:: + + rst2odt.py --odf-config-file=mymappingfile.ini \ + --stylesheet=mystyles.odt mydoc.txt mydoc.odt + + +Classes +------- + +``odtwriter`` uses the following Docutils class to provide additional +control of the generation of ODF content: + +- Class ``wrap`` -- Use this to cause the wrapping of text around + an image. The default is *not* to wrap text around images. + Here is an example:: + + .. class:: wrap + .. image:: images/flower01.png + :alt: A bright yellow flower + :height: 55 + :width: 60 + + +Roles +------- + +You can use a Docutils custom interpreted text role to attach a +character style to an inline area of text. This capability also +enables you to attach a new character style (with a new name) that +you define yourself. Do this by defining your role in a stylesheet +as a character style with "rststyle-" prefixed to your role name, +then use the ``role`` directive and inline markup to apply your +role. + +In order to use this capability, do the following: + +- Define the character style for your custom role in a stylesheet + (a copy of ``styles.odt``) with the prefix "rststyle-". + Remember: (1) If the name of your custom role is "pretty", then + define a character style named "rststyle-pretty". (2) Define the + style as a *character* style, and *not*, for example as a + paragraph style. + +- Declare your role in the source reStructuredText document in a + ``role`` directive. Example:: + + .. role:: pretty + +- Use inline markup to apply your role to text. Example:: + + We have :pretty:`very nice` apples. + +Here is another example:: + + .. role:: fancy + + Here is some :fancy:`pretty text` that looks fancy. + +For more on roles see: +`Custom Interpreted Text Roles -- +https://docutils.sourceforge.io/docs/ref/rst/directives.html#custom-interpreted-text-roles +<https://docutils.sourceforge.io/docs/ref/rst/directives.html#custom-interpreted-text-roles>`_. + +**Note:** The ability to base a role on another existing role is +*not* supported by ``odtwriter``. + + +Hints and Suggestions and Features +================================== + +Table of contents +----------------- + +The ``..contents::`` directive causes ``odtwriter`` to generate +either: + +1. A static, outline style table of contents, if the + ``--generate-list-toc`` command line option is specified, or + +2. An ODF/``oowriter`` style table of contents containing + dynamically updated page numbers and with the formatting control + that ``oowriter`` gives you. This is the default, or use the + command line option ``--generate-list-toc``. **Note:** + ``odtwriter`` is not able to determine page numbers, so you will + need to open the generated document in ``oowriter``, then + right-click on the table of contents and select "Update" to + insert correct page numbers. + + +Syntax highlighting +------------------- + +``odtwriter`` can add syntax highlighting to code in code +blocks. In order to activate this, do all of the following: + +1. Install `Pygments`_ and ... + +2. Use the command line option ``--add-syntax-highlighting``. + Example:: + + $ rst2odt.py --add-syntax-highlight test.txt test.odt + +The following styles are defined in styles.odt and are used for +literal code blocks and syntax highlighting: + +- Paragraph styles: + + - rststyle-codeblock -- The style for the code block as a whole. + +- Character styles: + + - rststyle-codeblock-classname -- class names. + + - rststyle-codeblock-comment -- comments. + + - rststyle-codeblock-functionname -- function names. + + - rststyle-codeblock-keyword -- Python language keywords. + + - rststyle-codeblock-name -- other names, for example, + variables. + + - rststyle-codeblock-number -- literal numbers, including + integers, floats, hex numbers, and octal numbers. + + - rststyle-codeblock-operator -- Python operators. + + - rststyle-codeblock-string -- literal strings. + +Each of the above styles has a default appearance that is defined +in ``styles.odt``. To change that definition and appearance, open +``styles.odt`` in ``oowriter`` and use menu item:: + + Format --> Styles and Formatting + +Then, click on the Paragraph Styles button or the Character Styles +button at the top of the Styles and Formatting window. You may +also need to select "All Styles" from the drop-down selection list +at the bottom of the Styles and Formatting window. + + + +The container directive +----------------------- + +There is limited support for the ``container`` directive. The +limitations and rules for the container directive are the following: + +- Only the first class in the list of classes (arguments) is used. + +- That class/style must be a paragraph style and not (for example) a + character style. + +- The style/class given to the container directive will have a + "rststyle-" prefix in the odt file. + +So, for example:: + + .. container:: style-1 style-2 style-3 + + a block of text + +- Only ``style-1`` is used; ``style-2`` and ``style-3`` are ignored. + +- ``rststyle-style-1`` must be defined. It should be an existing, + predefined style, or you should define it in your stylesheet + (``styles.odt`` or the argument to the ``--stylesheet`` command + line option). + +- ``rststyle-style-1`` must be a paragraph style. + +To define a paragraph style, use the following menu item in +``oowriter``:: + + Format --> Styles and Formatting + +Then, click on the Paragraph Styles button. + +The following example attaches the ``rststyle-heading2`` style (a +predefined style) to each paragraph/line in the container:: + + .. container:: heading2 + + Line 1 of container. + + Line 2 of container. + +More information on how to define a new style (for example, in your +``styles.odt``) can be found in section +`Defining and using custom style names`_. + + + +The table directive +------------------- + +The ``table`` directive can be used to add a title to a table. +Example:: + + .. table:: A little test table + + =========== ============= + Name Value + =========== ============= + Dave Cute + Mona Smart + =========== ============= + +The above will insert the title "A little test table" at the top of the +table. You can modify the appearance of the title by modifying the +paragraph style ``rststyle-table-title``. + + +Footnotes and citations +----------------------- + +Footnotes and citations are supported. + +There are additional styles ``rststyle-footnote`` and +``rststyle-citation`` for footnotes and citations. See +`Footnote and citation styles`_. + +You may need to modify the citation style to fit the length of your +citation references. + +Endnotes -- There are command line options that control whether +``odtwriter`` creates endnotes instead of footnotes. Endnotes +appear at the end of the document instead of at the bottom of the +page. See flags ``--endnotes-end-doc`` and +``--no-endnotes-end-doc`` in section `Command line options`_. + + +Images and figures +------------------ + +If on the image or the figure directive you provide the scale option +but do not provide the width and height options, then ``odtwriter`` +will attempt to determine the size of the image using the `Python +Imaging Library`_ (PIL). If ``odtwriter`` cannot find and import +Python Imaging Library, it will raise an exception. If this +ocurrs, you can fix it by doing one of the following: + +- Install the Python Imaging Library or + +- Remove the ``scale`` option or + +- Add both the ``width`` and the ``height`` options. + +So, the rule is: if on any image or figure, you specify scale but +not both width and height, you must install the `Python Imaging +Library`_ library. + +For more information about PIL, see: `Python Imaging Library`_. + + +The raw directive +----------------- + +The ``raw`` directive is supported. Use output format type "odt". + +You will need to be careful about the formatting of the raw +content. In particular, introduced whitespace might be a problem. + +In order to produce content for the raw directive for use by +``odtwriter``, you might want to extract the file ``content.xml`` +from a ``.odt`` file (using some Zip tool), and then clip, paste, +and modify a selected bit of it. + +Here is an example:: + + .. raw:: odt + + <text:p text:style-name="rststyle-textbody">Determining + <text:span text:style-name="rststyle-emphasis">which</text:span> + namespace a name is in is static. It can be determined by a + lexical scan of the code. If a variable is assigned a value + <text:span text:style-name="rststyle-emphasis">anywhere</text:span> + in a scope (specifically within a function or method body), + then that variable is local to that scope. If Python does + not find a variable in the local scope, then it looks next + in the global scope (also sometimes called the module scope) + and then in the built-ins scope. But, the + <text:span text:style-name="rststyle-inlineliteral">global</text:span> + statement can be used to force Python to find and use a global + variable (a variable defined at top level in a module) rather + than create a local one.</text:p> + + +The meta directive +------------------ + +``odtwriter`` supports the ``meta`` directive. "keywords" +and "description" are set in their respective odt fields. +Other meta fields are set as "Custom Properties". +Here is an example:: + + .. meta:: + :keywords: reStructuredText, docutils, formatting + :description lang=en: A reST document, contains formatted + text in a formatted style. + :custom_var: Value + +To see the results of the ``meta`` directive in ``oowriter``, +select menu item "File/Properties...", then click on the +"Description" tab ("keywords" and "description" fields) and the +"Custom Properties" tab. + + +Footnote references inside footnotes +------------------------------------ + +Not supported. + +Get a grip. Be serious. Try a dose of reality. + +``odtwriter`` ignores them. + +They cause ``oowriter`` to croak. + + +Page size +--------- + +The default page size, in documents generated by ``odtwriter`` is +``Letter``. You can change this (for example to ``A4``) by using a +custom stylesheet. See `Defining and using a custom stylesheet`_ +for instructions on how to do this. + +On machines which support ``paperconf``, ``odtwriter`` can insert +the default page size for your locale. In order for this to work, +the following conditions must be met: + +1. The program ``paperconf`` must be available on your system. + ``odtwriter`` uses ``paperconf -s`` to obtain the paper size. + See ``man paperconf`` for more information. + +2. The default page height and width must be removed from the + ``styles.odt`` used to generate the document. A Python script + ``rst2odt_prepstyles.py`` is distributed with ``odtwriter`` and + is installed in the ``bin`` directory. You can remove the page + height and width with something like the following:: + + $ rst2odt_prepstyles.py styles.odt + +.. warning:: If you edit your stylesheet in ``oowriter`` and then + save it, ``oowriter`` automatically inserts a page height and + width in the styles for that (stylesheet) document. If that is + not the page size that you want and you want ``odtwriter`` to + insert a default page size using ``paperconf``, then you will + need to strip the page size from your stylesheet each time you + edit that stylesheet with ``oowriter``. + + + +Custom header/footers: inserting page numbers, date, time, etc +---------------------------------------------------------------- + +You can specify custom headers and footers for your document from +the command line. These headers and footers can be used to insert +fields such as the page number, page count, date, time, etc. See +below for a complete list. + +To insert a custom header or footer, use the "--custom-odt-header" +or "--custom-odt-footer" command line options. For example, the +following inserts a footer containing the page number and page +count:: + + $ rst2odt.py --custom-odt-footer="Page %p% of %P%" f1.txt f1.odt + + +Field specifiers +~~~~~~~~~~~~~~~~~~ + +You can use the following field specifiers to insert ``oowriter`` +fields in your custom headers and footers: + +%p% + The current page number. + +%P% + The number of pages in the document. + +%d1% + The current date in format 12/31/99. + +%d2% + The current date in format 12/31/1999. + +%d3% + The current date in format Dec 31, 1999. + +%d4% + The current date in format December 31, 1999. + +%d5% + The current date in format 1999-12-31. + +%t1% + The current time in format 14:22. + +%t2% + The current time in format 14:22:33. + +%t3% + The current time in format 02:22 PM. + +%t4% + The current time in format 02:22:33 PM. + +%a% + The author of the document (actually the initial creator). + +%t% + The document title. + +%s% + The document subject. + + +**Note:** The use of the above field specifiers in the body of your +reStructuredText document is **not** supported, because these +specifiers are not standard across Docutils writers. + + + +Credits +======= + +Stefan Merten designed and implemented the custom style names +capability. Thank you, Stefan. + +Michael Schutte supports the Debian GNU/Linux distribution of +``odtwriter``. Thank you, Michael, for providing and supporting +the Debian package. + +Michael Schutte implemented the fix that enables ``odtwriter`` to +pick up the default paper size on platforms where the program +``paperconf`` is available. Thank you. + + + + +.. _`Pygments`: + https://pygments.org/ + +.. _`Docutils`: + https://docutils.sourceforge.io/ + +.. _`Python Imaging Library`: + https://en.wikipedia.org/wiki/Python_Imaging_Library + +.. _`Open Document at Wikipedia`: + https://en.wikipedia.org/wiki/OpenDocument + +.. _`OASIS Open Document Format for Office Applications (OpenDocument) TC`: + http://www.oasis-open.org/committees/tc_home.php?wg_abbrev=office diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/rst/cheatsheet.txt b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/rst/cheatsheet.txt new file mode 100644 index 00000000..6db296b1 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/rst/cheatsheet.txt @@ -0,0 +1,132 @@ +===================================================== + The reStructuredText_ Cheat Sheet: Syntax Reminders +===================================================== +:Info: See <https://docutils.sourceforge.io/rst.html> for introductory docs. +:Author: David Goodger <goodger@python.org> +:Date: $Date$ +:Revision: $Revision$ +:Description: This is a "docinfo block", or bibliographic field list + +.. NOTE:: If you are reading this as HTML, please read + `<cheatsheet.txt>`_ instead to see the input syntax examples! + +Section Structure +================= +Section titles are underlined or overlined & underlined. + +Body Elements +============= +Grid table: + ++--------------------------------+-----------------------------------+ +| Paragraphs are flush-left, | Literal block, preceded by "::":: | +| separated by blank lines. | | +| | Indented | +| Block quotes are indented. | | ++--------------------------------+ or:: | +| >>> print 'Doctest block' | | +| Doctest block | > Quoted | ++--------------------------------+-----------------------------------+ +| | Line blocks preserve line breaks & indents. [new in 0.3.6] | +| | Useful for addresses, verse, and adornment-free lists; long | +| lines can be wrapped with continuation lines. | ++--------------------------------------------------------------------+ + +Simple tables: + +================ ============================================================ +List Type Examples (syntax in the `text source <cheatsheet.txt>`_) +================ ============================================================ +Bullet list * items begin with "-", "+", or "*" +Enumerated list 1. items use any variation of "1.", "A)", and "(i)" + #. also auto-enumerated +Definition list Term is flush-left : optional classifier + Definition is indented, no blank line between +Field list :field name: field body +Option list -o at least 2 spaces between option & description +================ ============================================================ + +================ ============================================================ +Explicit Markup Examples (visible in the `text source`_) +================ ============================================================ +Footnote .. [1] Manually numbered or [#] auto-numbered + (even [#labelled]) or [*] auto-symbol +Citation .. [CIT2002] A citation. +Hyperlink Target .. _reStructuredText: https://docutils.sourceforge.io/rst.html + .. _indirect target: reStructuredText_ + .. _internal target: +Anonymous Target __ https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html +Directive ("::") .. image:: images/biohazard.png +Substitution Def .. |substitution| replace:: like an inline directive +Comment .. is anything else +Empty Comment (".." on a line by itself, with blank lines before & after, + used to separate indentation contexts) +================ ============================================================ + +Inline Markup +============= +*emphasis*; **strong emphasis**; `interpreted text`; `interpreted text +with role`:emphasis:; ``inline literal text``; standalone hyperlink, +https://docutils.sourceforge.io; named reference, reStructuredText_; +`anonymous reference`__; footnote reference, [1]_; citation reference, +[CIT2002]_; |substitution|; _`inline internal target`. + +Directive Quick Reference +========================= +See <https://docutils.sourceforge.io/docs/ref/rst/directives.html> for full info. + +================ ============================================================ +Directive Name Description (Docutils version added to, in [brackets]) +================ ============================================================ +attention Specific admonition; also "caution", "danger", + "error", "hint", "important", "note", "tip", "warning" +admonition Generic titled admonition: ``.. admonition:: By The Way`` +image ``.. image:: picture.png``; many options possible +figure Like "image", but with optional caption and legend +topic ``.. topic:: Title``; like a mini section +sidebar ``.. sidebar:: Title``; like a mini parallel document +parsed-literal A literal block with parsed inline markup +rubric ``.. rubric:: Informal Heading`` +epigraph Block quote with class="epigraph" +highlights Block quote with class="highlights" +pull-quote Block quote with class="pull-quote" +compound Compound paragraphs [0.3.6] +container Generic block-level container element [0.3.10] +table Create a titled table [0.3.1] +list-table Create a table from a uniform two-level bullet list [0.3.8] +csv-table Create a table from CSV data [0.3.4] +contents Generate a table of contents +sectnum Automatically number sections, subsections, etc. +header, footer Create document decorations [0.3.8] +target-notes Create an explicit footnote for each external target +math Mathematical notation (input in LaTeX format) +meta Document metadata +include Read an external reST file as if it were inline +raw Non-reST data passed untouched to the Writer +replace Replacement text for substitution definitions +unicode Unicode character code conversion for substitution defs +date Generates today's date; for substitution defs +class Set a "class" attribute on the next element +role Create a custom interpreted text role [0.3.2] +default-role Set the default interpreted text role [0.3.10] +title Set the metadata document title [0.3.10] +================ ============================================================ + +Interpreted Text Role Quick Reference +===================================== +See <https://docutils.sourceforge.io/docs/ref/rst/roles.html> for full info. + +================ ============================================================ +Role Name Description +================ ============================================================ +emphasis Equivalent to *emphasis* +literal Equivalent to ``literal`` but processes backslash escapes +math Mathematical notation (input in LaTeX format) +PEP Reference to a numbered Python Enhancement Proposal +RFC Reference to a numbered Internet Request For Comments +raw For non-reST data; cannot be used directly (see docs) [0.3.6] +strong Equivalent to **strong** +sub Subscript +sup Superscript +title Title reference (book, etc.); standard default role +================ ============================================================ diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/rst/demo.txt b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/rst/demo.txt new file mode 100644 index 00000000..e61426eb --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/rst/demo.txt @@ -0,0 +1,567 @@ +.. This is a comment. Note how any initial comments are moved by + transforms to after the document title, subtitle, and docinfo. + +================================ + reStructuredText Demonstration +================================ + +.. Above is the document title, and below is the subtitle. + They are transformed from section titles after parsing. + +-------------------------------- + Examples of Syntax Constructs +-------------------------------- + +.. bibliographic fields (which also require a transform): + +:Author: David Goodger +:Address: 123 Example Street + Example, EX Canada + A1B 2C3 +:Contact: docutils-develop@lists.sourceforge.net +:Authors: Me; Myself; I +:organization: humankind +:date: $Date$ +:status: This is a "work in progress" +:revision: $Revision$ +:version: 1 +:copyright: This document has been placed in the public domain. You + may do with it as you wish. You may copy, modify, + redistribute, reattribute, sell, buy, rent, lease, + destroy, or improve it, quote it at length, excerpt, + incorporate, collate, fold, staple, or mutilate it, or do + anything else to it that your or anyone else's heart + desires. +:field name: This is a generic bibliographic field. +:field name 2: + Generic bibliographic fields may contain multiple body elements. + + Like this. + +:Dedication: + + For Docutils users & co-developers. + +:abstract: + + This document is a demonstration of the reStructuredText markup + language, containing examples of all basic reStructuredText + constructs and many advanced constructs. + +.. meta:: + :keywords: reStructuredText, demonstration, demo, parser + :description lang=en: A demonstration of the reStructuredText + markup language, containing examples of all basic + constructs and many advanced constructs. + +.. contents:: Table of Contents +.. section-numbering:: + + +Structural Elements +=================== + +Section Title +------------- + +That's it, the text just above this line. + +Transitions +----------- + +Here's a transition: + +--------- + +It divides the section. + +Body Elements +============= + +Paragraphs +---------- + +A paragraph. + +Inline Markup +````````````` + +Paragraphs contain text and may contain inline markup: *emphasis*, +**strong emphasis**, ``inline literals``, standalone hyperlinks +(https://www.python.org), external hyperlinks (Python_), internal +cross-references (example_), external hyperlinks with embedded URIs +(`Python web site <https://www.python.org>`__), footnote references +(manually numbered [1]_, anonymous auto-numbered [#]_, labeled +auto-numbered [#label]_, or symbolic [*]_), citation references +([CIT2002]_), substitution references (|example|), and _`inline +hyperlink targets` (see Targets_ below for a reference back to here). +Character-level inline markup is also possible (although exceedingly +ugly!) in *re*\ ``Structured``\ *Text*. Problems are indicated by +|problematic| text (generated by processing errors; this one is +intentional). + +The default role for interpreted text is `Title Reference`. Here are +some explicit interpreted text roles: a PEP reference (:PEP:`287`); an +RFC reference (:RFC:`2822`); a :sub:`subscript`; a :sup:`superscript`; +and explicit roles for :emphasis:`standard` :strong:`inline` +:literal:`markup`. + +.. DO NOT RE-WRAP THE FOLLOWING PARAGRAPH! + +Let's test wrapping and whitespace significance in inline literals: +``This is an example of --inline-literal --text, --including some-- +strangely--hyphenated-words. Adjust-the-width-of-your-browser-window +to see how the text is wrapped. -- ---- -------- Now note the +spacing between the words of this sentence (words +should be grouped in pairs).`` + +If the ``--pep-references`` option was supplied, there should be a +live link to PEP 258 here. + +Bullet Lists +------------ + +- A bullet list + + + Nested bullet list. + + Nested item 2. + +- Item 2. + + Paragraph 2 of item 2. + + * Nested bullet list. + * Nested item 2. + + - Third level. + - Item 2. + + * Nested item 3. + +Enumerated Lists +---------------- + +1. Arabic numerals. + + a) lower alpha) + + (i) (lower roman) + + A. upper alpha. + + I) upper roman) + +2. Lists that don't start at 1: + + 3. Three + + 4. Four + + C. C + + D. D + + iii. iii + + iv. iv + +#. List items may also be auto-enumerated. + +Definition Lists +---------------- + +Term + Definition +Term : classifier + Definition paragraph 1. + + Definition paragraph 2. +Term + Definition + +Field Lists +----------- + +:what: Field lists map field names to field bodies, like database + records. They are often part of an extension syntax. They are + an unambiguous variant of RFC 2822 fields. + +:how arg1 arg2: + + The field marker is a colon, the field name, and a colon. + + The field body may contain one or more body elements, indented + relative to the field marker. + +Option Lists +------------ + +For listing command-line options: + +-a command-line option "a" +-b file options can have arguments + and long descriptions +--long options can be long also +--input=file long options can also have + arguments + +--very-long-option + The description can also start on the next line. + + The description may contain multiple body elements, + regardless of where it starts. + +-x, -y, -z Multiple options are an "option group". +-v, --verbose Commonly-seen: short & long options. +-1 file, --one=file, --two file + Multiple options with arguments. +/V DOS/VMS-style options too + +There must be at least two spaces between the option and the +description. + +Literal Blocks +-------------- + +Literal blocks are indicated with a double-colon ("::") at the end of +the preceding paragraph (over there ``-->``). They can be indented:: + + if literal_block: + text = 'is left as-is' + spaces_and_linebreaks = 'are preserved' + markup_processing = None + +Or they can be quoted without indentation:: + +>> Great idea! +> +> Why didn't I think of that? + +Line Blocks +----------- + +| This is a line block. It ends with a blank line. +| Each new line begins with a vertical bar ("|"). +| Line breaks and initial indents are preserved. +| Continuation lines are wrapped portions of long lines; + they begin with a space in place of the vertical bar. +| The left edge of a continuation line need not be aligned with + the left edge of the text above it. + +| This is a second line block. +| +| Blank lines are permitted internally, but they must begin with a "|". + +Take it away, Eric the Orchestra Leader! + + | A one, two, a one two three four + | + | Half a bee, philosophically, + | must, *ipso facto*, half not be. + | But half the bee has got to be, + | *vis a vis* its entity. D'you see? + | + | But can a bee be said to be + | or not to be an entire bee, + | when half the bee is not a bee, + | due to some ancient injury? + | + | Singing... + +Block Quotes +------------ + +Block quotes consist of indented body elements: + + My theory by A. Elk. Brackets Miss, brackets. This theory goes + as follows and begins now. All brontosauruses are thin at one + end, much much thicker in the middle and then thin again at the + far end. That is my theory, it is mine, and belongs to me and I + own it, and what it is too. + + -- Anne Elk (Miss) + +Doctest Blocks +-------------- + +>>> print 'Python-specific usage examples; begun with ">>>"' +Python-specific usage examples; begun with ">>>" +>>> print '(cut and pasted from interactive Python sessions)' +(cut and pasted from interactive Python sessions) + +Tables +------ + +Here's a grid table followed by a simple table: + ++------------------------+------------+----------+----------+ +| Header row, column 1 | Header 2 | Header 3 | Header 4 | +| (header rows optional) | | | | ++========================+============+==========+==========+ +| body row 1, column 1 | column 2 | column 3 | column 4 | ++------------------------+------------+----------+----------+ +| body row 2 | Cells may span columns. | ++------------------------+------------+---------------------+ +| body row 3 | Cells may | - Table cells | ++------------------------+ span rows. | - contain | +| body row 4 | | - body elements. | ++------------------------+------------+----------+----------+ +| body row 5 | Cells may also be | | +| | empty: ``-->`` | | ++------------------------+-----------------------+----------+ + +===== ===== ====== + Inputs Output +------------ ------ + A B A or B +===== ===== ====== +False False False +True False True +False True True +True True True +===== ===== ====== + +Footnotes +--------- + +.. [1] A footnote contains body elements, consistently indented by at + least 3 spaces. + + This is the footnote's second paragraph. + +.. [#label] Footnotes may be numbered, either manually (as in [1]_) or + automatically using a "#"-prefixed label. This footnote has a + label so it can be referred to from multiple places, both as a + footnote reference ([#label]_) and as a hyperlink reference + (label_). + +.. [#] This footnote is numbered automatically and anonymously using a + label of "#" only. + +.. [*] Footnotes may also use symbols, specified with a "*" label. + Here's a reference to the next footnote: [*]_. + +.. [*] This footnote shows the next symbol in the sequence. + +.. [4] Here's an unreferenced footnote, with a reference to a + nonexistent footnote: [5]_. + +Citations +--------- + +.. [CIT2002] Citations are text-labeled footnotes. They may be + rendered separately and differently from footnotes. + +Here's a reference to the above, [CIT2002]_, and a [nonexistent]_ +citation. + +Targets +------- + +.. _example: + +This paragraph is pointed to by the explicit "example" target. A +reference can be found under `Inline Markup`_, above. `Inline +hyperlink targets`_ are also possible. + +Section headers are implicit targets, referred to by name. See +Targets_, which is a subsection of `Body Elements`_. + +Explicit external targets are interpolated into references such as +"Python_". + +.. _Python: https://www.python.org + +Targets may be indirect and anonymous. Thus `this phrase`__ may also +refer to the Targets_ section. + +__ Targets_ + +Here's a `hyperlink reference without a target`_, which generates an +error. + +Duplicate Target Names +`````````````````````` + +Duplicate names in section headers or other implicit targets will +generate "info" (level-1) system messages. Duplicate names in +explicit targets will generate "warning" (level-2) system messages. + +Duplicate Target Names +`````````````````````` + +Since there are two "Duplicate Target Names" section headers, we +cannot uniquely refer to either of them by name. If we try to (like +this: `Duplicate Target Names`_), an error is generated. + +Directives +---------- + +.. contents:: :local: + +These are just a sample of the many reStructuredText Directives. For +others, please see +https://docutils.sourceforge.io/docs/ref/rst/directives.html. + +Document Parts +`````````````` + +An example of the "contents" directive can be seen above this section +(a local, untitled table of contents_) and at the beginning of the +document (a document-wide `table of contents`_). + +Images +`````` + +An image directive (also clickable -- a hyperlink reference): + +.. image:: images/title.png + :target: directives_ + +A figure directive: + +.. figure:: images/title.png + :alt: reStructuredText, the markup syntax + + A figure is an image with a caption and/or a legend: + + +------------+-----------------------------------------------+ + | re | Revised, revisited, based on 're' module. | + +------------+-----------------------------------------------+ + | Structured | Structure-enhanced text, structuredtext. | + +------------+-----------------------------------------------+ + | Text | Well it is, isn't it? | + +------------+-----------------------------------------------+ + + This paragraph is also part of the legend. + +Admonitions +``````````` + +.. Attention:: Directives at large. + +.. Caution:: + + Don't take any wooden nickels. + +.. DANGER:: Mad scientist at work! + +.. Error:: Does not compute. + +.. Hint:: It's bigger than a bread box. + +.. Important:: + - Wash behind your ears. + - Clean up your room. + - Call your mother. + - Back up your data. + +.. Note:: This is a note. + +.. Tip:: 15% if the service is good. + +.. WARNING:: Strong prose may provoke extreme mental exertion. + Reader discretion is strongly advised. + +.. admonition:: And, by the way... + + You can make up your own admonition too. + +Topics, Sidebars, and Rubrics +````````````````````````````` + +.. sidebar:: Optional Sidebar Title + :subtitle: Optional Subtitle + + This is a sidebar. It is for text outside the flow of the main + text. + + .. rubric:: This is a rubric inside a sidebar + + Sidebars often appears beside the main text with a border and + background color. + +.. topic:: Topic Title + + This is a topic. + +.. rubric:: This is a rubric + +Target Footnotes +```````````````` + +.. target-notes:: + +Replacement Text +```````````````` + +I recommend you try |Python|_. + +.. |Python| replace:: Python, *the* best language around + +Compound Paragraph +`````````````````` + +.. compound:: + + This paragraph contains a literal block:: + + Connecting... OK + Transmitting data... OK + Disconnecting... OK + + and thus consists of a simple paragraph, a literal block, and + another simple paragraph. Nonetheless it is semantically *one* + paragraph. + +This construct is called a *compound paragraph* and can be produced +with the "compound" directive. + +Meta +```` + +The `“meta” directive`__ is used to specify metadata to be stored in, +e.g., HTML META__ tags or ODT file properties. + +.. meta:: + :keywords: reStructuredText, test, parser + :description lang=en: A test document, containing at least one + example of each reStructuredText construct. + +__ https://docutils.sourceforge.io/docs/ref/rst/directives.html#metadata +__ https://developer.mozilla.org/en-US/docs/Web/HTML/Viewport_meta_tag + + +Substitution Definitions +------------------------ + +An inline image (|example|) example: + +.. |EXAMPLE| image:: images/biohazard.png + +(Substitution definitions are not visible in the HTML source.) + +Comments +-------- + +Here's one: + +.. Comments begin with two dots and a space. Anything may + follow, except for the syntax of footnotes, hyperlink + targets, directives, or substitution definitions. + + Double-dashes -- "--" -- must be escaped somehow in HTML output. + +(View the HTML source to see the comment.) + +Error Handling +============== + +Any errors caught during processing will generate system messages. + +|*** Expect 6 errors (including this one). ***| + +There should be six messages in the following, auto-generated +section, "Docutils System Messages": + +.. section should be added by Docutils automatically diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/rst/images/biohazard-bitmap-scaling.svg b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/rst/images/biohazard-bitmap-scaling.svg new file mode 100644 index 00000000..e69de29b diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/rst/images/biohazard-bitmap.svg b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/rst/images/biohazard-bitmap.svg new file mode 100644 index 00000000..e69de29b diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/rst/images/biohazard-scaling.svg b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/rst/images/biohazard-scaling.svg new file mode 100644 index 00000000..e69de29b diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/rst/images/biohazard.png b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/rst/images/biohazard.png new file mode 100644 index 00000000..e69de29b diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/rst/images/biohazard.svg b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/rst/images/biohazard.svg new file mode 100644 index 00000000..e69de29b diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/rst/images/biohazard.swf b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/rst/images/biohazard.swf new file mode 100644 index 00000000..e69de29b diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/rst/images/pens.mp4 b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/rst/images/pens.mp4 new file mode 100644 index 00000000..e69de29b diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/rst/images/title-scaling.svg b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/rst/images/title-scaling.svg new file mode 100644 index 00000000..e69de29b diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/rst/images/title.png b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/rst/images/title.png new file mode 100644 index 00000000..e69de29b diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/rst/images/title.svg b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/rst/images/title.svg new file mode 100644 index 00000000..e69de29b diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/rst/quickstart.txt b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/rst/quickstart.txt new file mode 100644 index 00000000..8b62afcb --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/rst/quickstart.txt @@ -0,0 +1,404 @@ +A ReStructuredText Primer +========================= + +:Author: Richard Jones +:Version: $Revision$ +:Copyright: This document has been placed in the public domain. + +.. contents:: + + +The text below contains links that look like "(quickref__)". These +are relative links that point to the `Quick reStructuredText`_ user +reference. If these links don't work, please refer to the `master +quick reference`_ document. + +__ +.. _Quick reStructuredText: quickref.html +.. _master quick reference: + https://docutils.sourceforge.io/docs/user/rst/quickref.html + +.. Note:: This document is an informal introduction to + reStructuredText. The `What Next?`_ section below has links to + further resources, including a formal reference. + + +Structure +--------- + +From the outset, let me say that "Structured Text" is probably a bit +of a misnomer. It's more like "Relaxed Text" that uses certain +consistent patterns. These patterns are interpreted by a HTML +converter to produce "Very Structured Text" that can be used by a web +browser. + +The most basic pattern recognised is a **paragraph** (quickref__). +That's a chunk of text that is separated by blank lines (one is +enough). Paragraphs must have the same indentation -- that is, line +up at their left edge. Paragraphs that start indented will result in +indented quote paragraphs. For example:: + + This is a paragraph. It's quite + short. + + This paragraph will result in an indented block of + text, typically used for quoting other text. + + This is another one. + +Results in: + + This is a paragraph. It's quite + short. + + This paragraph will result in an indented block of + text, typically used for quoting other text. + + This is another one. + +__ quickref.html#paragraphs + + +Text styles +----------- + +(quickref__) + +__ quickref.html#inline-markup + +Inside paragraphs and other bodies of text, you may additionally mark +text for *italics* with "``*italics*``" or **bold** with +"``**bold**``". This is called "inline markup". + +If you want something to appear as a fixed-space literal, use +"````double back-quotes````". Note that no further fiddling is done +inside the double back-quotes -- so asterisks "``*``" etc. are left +alone. + +If you find that you want to use one of the "special" characters in +text, it will generally be OK -- reStructuredText is pretty smart. +For example, this lone asterisk * is handled just fine, as is the +asterisk in this equation: 5*6=30. If you actually +want text \*surrounded by asterisks* to **not** be italicised, then +you need to indicate that the asterisk is not special. You do this by +placing a backslash just before it, like so "``\*``" (quickref__), or +by enclosing it in double back-quotes (inline literals), like this:: + + ``*`` + +__ quickref.html#escaping + +.. Tip:: Think of inline markup as a form of (parentheses) and use it + the same way: immediately before and after the text being marked + up. Inline markup by itself (surrounded by whitespace) or in the + middle of a word won't be recognized. See the `markup spec`__ for + full details. + +__ ../../ref/rst/restructuredtext.html#inline-markup + + +Lists +----- + +Lists of items come in three main flavours: **enumerated**, +**bulleted** and **definitions**. In all list cases, you may have as +many paragraphs, sublists, etc. as you want, as long as the left-hand +side of the paragraph or whatever aligns with the first line of text +in the list item. + +Lists must always start a new paragraph -- that is, they must appear +after a blank line. + +**enumerated** lists (numbers, letters or roman numerals; quickref__) + __ quickref.html#enumerated-lists + + Start a line off with a number or letter followed by a period ".", + right bracket ")" or surrounded by brackets "( )" -- whatever you're + comfortable with. All of the following forms are recognised:: + + 1. numbers + + A. upper-case letters + and it goes over many lines + + with two paragraphs and all! + + a. lower-case letters + + 3. with a sub-list starting at a different number + 4. make sure the numbers are in the correct sequence though! + + I. upper-case roman numerals + + i. lower-case roman numerals + + (1) numbers again + + 1) and again + + Results in (note: the different enumerated list styles are not + always supported by every web browser, so you may not get the full + effect here): + + 1. numbers + + A. upper-case letters + and it goes over many lines + + with two paragraphs and all! + + a. lower-case letters + + 3. with a sub-list starting at a different number + 4. make sure the numbers are in the correct sequence though! + + I. upper-case roman numerals + + i. lower-case roman numerals + + (1) numbers again + + 1) and again + +**bulleted** lists (quickref__) + __ quickref.html#bullet-lists + + Just like enumerated lists, start the line off with a bullet point + character - either "-", "+" or "*":: + + * a bullet point using "*" + + - a sub-list using "-" + + + yet another sub-list + + - another item + + Results in: + + * a bullet point using "*" + + - a sub-list using "-" + + + yet another sub-list + + - another item + +**definition** lists (quickref__) + __ quickref.html#definition-lists + + Unlike the other two, the definition lists consist of a term, and + the definition of that term. The format of a definition list is:: + + what + Definition lists associate a term with a definition. + + *how* + The term is a one-line phrase, and the definition is one or more + paragraphs or body elements, indented relative to the term. + Blank lines are not allowed between term and definition. + + Results in: + + what + Definition lists associate a term with a definition. + + *how* + The term is a one-line phrase, and the definition is one or more + paragraphs or body elements, indented relative to the term. + Blank lines are not allowed between term and definition. + + +Preformatting (code samples) +---------------------------- +(quickref__) + +__ quickref.html#literal-blocks + +To just include a chunk of preformatted, never-to-be-fiddled-with +text, finish the prior paragraph with "``::``". The preformatted +block is finished when the text falls back to the same indentation +level as a paragraph prior to the preformatted block. For example:: + + An example:: + + Whitespace, newlines, blank lines, and all kinds of markup + (like *this* or \this) is preserved by literal blocks. + Lookie here, I've dropped an indentation level + (but not far enough) + + no more example + +Results in: + + An example:: + + Whitespace, newlines, blank lines, and all kinds of markup + (like *this* or \this) is preserved by literal blocks. + Lookie here, I've dropped an indentation level + (but not far enough) + + no more example + +Note that if a paragraph consists only of "``::``", then it's removed +from the output:: + + :: + + This is preformatted text, and the + last "::" paragraph is removed + +Results in: + +:: + + This is preformatted text, and the + last "::" paragraph is removed + + +Sections +-------- + +(quickref__) + +__ quickref.html#section-structure + +To break longer text up into sections, you use **section headers**. +These are a single line of text (one or more words) with adornment: an +underline alone, or an underline and an overline together, in dashes +"``-----``", equals "``======``", tildes "``~~~~~~``" or any of the +non-alphanumeric characters ``= - ` : ' " ~ ^ _ * + # < >`` that you +feel comfortable with. An underline-only adornment is distinct from +an overline-and-underline adornment using the same character. The +underline/overline must be at least as long as the title text. Be +consistent, since all sections marked with the same adornment style +are deemed to be at the same level:: + + Chapter 1 Title + =============== + + Section 1.1 Title + ----------------- + + Subsection 1.1.1 Title + ~~~~~~~~~~~~~~~~~~~~~~ + + Section 1.2 Title + ----------------- + + Chapter 2 Title + =============== + +This results in the following structure, illustrated by simplified +pseudo-XML:: + + <section> + <title> + Chapter 1 Title + <section> + <title> + Section 1.1 Title + <section> + <title> + Subsection 1.1.1 Title + <section> + <title> + Section 1.2 Title + <section> + <title> + Chapter 2 Title + +(Pseudo-XML uses indentation for nesting and has no end-tags. It's +not possible to show actual processed output, as in the other +examples, because sections cannot exist inside block quotes. For a +concrete example, compare the section structure of this document's +source text and processed output.) + +Note that section headers are available as link targets, just using +their name. To link to the Lists_ heading, I write "``Lists_``". If +the heading has a space in it like `text styles`_, we need to quote +the heading "```text styles`_``". + + +Document Title / Subtitle +````````````````````````` + +The title of the whole document is distinct from section titles and +may be formatted somewhat differently (e.g. the HTML writer by default +shows it as a centered heading). + +To indicate the document title in reStructuredText, use a unique adornment +style at the beginning of the document. To indicate the document subtitle, +use another unique adornment style immediately after the document title. For +example:: + + ================ + Document Title + ================ + ---------- + Subtitle + ---------- + + Section Title + ============= + + ... + +Note that "Document Title" and "Section Title" above both use equals +signs, but are distict and unrelated styles. The text of +overline-and-underlined titles (but not underlined-only) may be inset +for aesthetics. + + +Images +------ + +(quickref__) + +__ quickref.html#directives + +To include an image in your document, you use the the ``image`` directive__. +For example:: + + .. image:: images/biohazard.png + +results in: + +.. image:: images/biohazard.png + +The ``images/biohazard.png`` part indicates the filename of the image +you wish to appear in the document. There's no restriction placed on +the image (format, size etc). If the image is to appear in HTML and +you wish to supply additional information, you may:: + + .. image:: images/biohazard.png + :height: 100 + :width: 200 + :scale: 50 + :alt: alternate text + +See the full `image directive documentation`__ for more info. + +__ ../../ref/rst/directives.html +__ ../../ref/rst/directives.html#images + + +What Next? +---------- + +This primer introduces the most common features of reStructuredText, +but there are a lot more to explore. The `Quick reStructuredText`_ +user reference is a good place to go next. For complete details, the +`reStructuredText Markup Specification`_ is the place to go [#]_. + +Users who have questions or need assistance with Docutils or +reStructuredText should post a message to the Docutils-users_ mailing +list. + +.. [#] If that relative link doesn't work, try the master document: + https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html. + +.. _reStructuredText Markup Specification: + ../../ref/rst/restructuredtext.html +.. _Docutils-users: ../mailing-lists.html#docutils-users +.. _Docutils project web site: https://docutils.sourceforge.io/ diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/smartquotes.txt b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/smartquotes.txt new file mode 100644 index 00000000..97536639 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/smartquotes.txt @@ -0,0 +1,483 @@ +========================= +Smart Quotes for Docutils +========================= + +:Author: Günter Milde, + based on SmartyPants by John Gruber, Brad Choate, and Chad Miller +:Contact: docutils-develop@lists.sourceforge.net +:Revision: $Revision$ +:Date: $Date$ +:License: Released under the terms of the `2-Clause BSD license`_ +:Abstract: This document describes the Docutils `smartquotes` module. + +.. _2-Clause BSD license: http://opensource.org/licenses/BSD-2-Clause + +.. contents:: + +Description +=========== + +The `"smart_quotes" configuration setting`_ triggers the SmartQuotes +transformation on Text nodes that includes the following steps: + +- Straight quotes (``"`` and ``'``) into "curly" quote characters +- dashes (``--`` and ``---``) into en- and em-dash entities +- three consecutive dots (``...`` or ``. . .``) into an ellipsis entity. + +This means you can write, edit, and save your documents using plain old +ASCII -- straight quotes, plain dashes, and plain dots -- while Docutils +generates documents with typographical quotes, dashes, and ellipses. + +Advantages: + +* Typing speed (especially when blind-typing). +* The possibility to change the quoting style of the + complete document with just one configuration option. +* Typographical quotes with just 7-bit ASCII characters in the source. + +However, there are `algorithmic shortcomings`_ for 2 reasons: + +* Dual use of the "ASCII-apostrophe" (') as single quote and apostrophe. +* Languages that do not use whitespace around words. + +So, please consider also +`Why You Might Not Want to Use "Smart" Quotes in Your Documents`_. + +.. _"smart_quotes" configuration setting: +.. _"smart_quotes" setting: config.html#smart-quotes + + +Escaping +======== + +The `SmartQuotes` transform does not modify characters in literal text +such as source code, maths, or literal blocks. + +If you need literal straight quotes (or plain hyphens and periods) in normal +text, you can `backslash escape`_ the characters to preserve +ASCII-punctuation. + +.. class:: booktabs + +========= ========= == ======== ========== +Input Output Input Output +========= ========= == ======== ========== +``\\`` \\ ``\...`` \... +``\"`` \" ``\--`` \-- +``\'`` \' ``\``` \` +========= ========= == ======== ========== + +This is useful, for example, when you want to use straight quotes as +foot and inch marks: + + 6\'2\" tall; a 17\" monitor. + +.. _backslash escape: ../ref/rst/restructuredtext.html#escaping-mechanism + + +Localization +============ + +Quotation marks have a `variety of forms`__ in different languages and +media. + +__ https://en.wikipedia.org/wiki/Quotation_mark#Summary_table + +`SmartQuotes` inserts quotation marks depending on the language of the +current block element and the value of the `"smart_quotes" setting`_.\ +[#x-altquot]_ +There is built-in support for the following languages:\ [#smartquotes-locales]_ + +.. class:: run-in + +:af: .. class:: language-af + + "'Afrikaans' quotes" + +:af-x-altquot: .. class:: language-af-x-altquot + + "'Afrikaans' alternative quotes" + +:ca: .. class:: language-ca + + "'Catalan' quotes" + +:ca-x-altquot: .. class:: language-ca-x-altquot + + "'Catalan' alternative quotes" + +:cs: .. class:: language-cs + + "'Czech' quotes" + +:cs-x-altquot: .. class:: language-cs-x-altquot + + "'Czech' alternative quotes" + +:da: .. class:: language-da + + "'Danish' quotes" + +:da-x-altquot: .. class:: language-da-x-altquot + + "'Danish' alternative quotes" + +:de: .. class:: language-de + + "'German' quotes" + +:de-x-altquot: .. class:: language-de-x-altquot + + "'German' alternative quotes" + +:de-ch: .. class:: language-de-ch + + "'Swiss-German' quotes" + +:el: .. class:: language-el + + "'Greek' quotes" + +:en: .. class:: language-en + + "'English' quotes" + +:en-uk-x-altquot: .. class:: language-en-uk-x-altquot + + "'British' alternative quotes" (swaps single and double quotes) + +:eo: .. class:: language-eo + + "'Esperanto' quotes" + +:es: .. class:: language-es + + "'Spanish' quotes" + +:es-x-altquot: .. class:: language-es-x-altquot + + "'Spanish' alternative quotes" + +:et: .. class:: language-et + + "'Estonian' quotes" (no secondary quote listed in Wikipedia) + +:et-x-altquot: .. class:: language-et-x-altquot + + "'Estonian' alternative quotes" + +:eu: .. class:: language-eu + + "'Basque' quotes" + +:fi: .. class:: language-fi + + "'Finnish' quotes" + +:fi-x-altquot: .. class:: language-fi-x-altquot + + "'Finnish' alternative quotes" + +:fr: .. class:: language-fr + + "'French' quotes" + +:fr-x-altquot: .. class:: language-fr-x-altquot + + "'French' alternative quotes" + +:fr-ch: .. class:: language-fr-ch + + "'Swiss-French' quotes" + +:fr-ch-x-altquot: .. class:: language-fr-ch-x-altquot + + "'Swiss-French' alternative quotes" (narrow no-break space, see + http://typoguide.ch/) + +:gl: .. class:: language-gl + + "'Galician' quotes" + +:he: .. class:: language-he + + "'Hebrew' quotes" + +:he-x-altquot: .. class:: language-he-x-altquot + + "'Hebrew' alternative quotes" + +:hr: .. class:: language-hr + + "'Croatian' quotes" + +:hr-x-altquot: .. class:: language-hr-x-altquot + + "'Croatian' alternative quotes" + +:hsb: .. class:: language-hsb + + "'Upper Sorbian' quotes" + +:hsb-x-altquot: .. class:: language-hsb-x-altquot + + "'Upper Sorbian' alternative quotes" + +:hu: .. class:: language-hu + + "'Hungarian' quotes" + +:is: .. class:: language-is + + "'Icelandic' quotes" + +:it: .. class:: language-it + + "'Italian' quotes" + +:it-ch: .. class:: language-it-ch + + "'Swiss-Italian' quotes" + +:it-x-altquot: .. class:: language-it-x-altquot + + "'Italian' alternative quotes" + +:ja: .. class:: language-ja + + "'Japanese' quotes" + +:lt: .. class:: language-lt + + "'Lithuanian' quotes" + +:lv: .. class:: language-lv + + "'Latvian' quotes" + +:nl: .. class:: language-nl + + "'Dutch' quotes" + +:nl-x-altquot: .. class:: language-nl-x-altquot + + "'Dutch' alternative quotes" + + .. # 'nl-x-altquot2': '””’’', + +:pl: .. class:: language-pl + + "'Polish' quotes" + +:pl-x-altquot: .. class:: language-pl-x-altquot + + "'Polish' alternative quotes" + +:pt: .. class:: language-pt + + "'Portuguese' quotes" + +:pt-br: .. class:: language-pt-br + + "'Portuguese (Brazil)' quotes" + +:ro: .. class:: language-ro + + "'Romanian' quotes" + +:ru: .. class:: language-ru + + "'Russian' quotes" + +:sh: .. class:: language-sh + + "'Serbo-Croatian' quotes" + +:sh-x-altquot: .. class:: language-sh-x-altquot + + "'Serbo-Croatian' alternative quotes" + +:sk: .. class:: language-sk + + "'Slovak' quotes" + +:sk-x-altquot: .. class:: language-sk-x-altquot + + "'Slovak' alternative quotes" + +:sl: .. class:: language-sl + + "'Slovenian' quotes" + +:sl-x-altquot: .. class:: language-sl-x-altquot + + "'Slovenian' alternative quotes" + +:sr: .. class:: language-sr + + "'Serbian' quotes" + +:sr-x-altquot: .. class:: language-sr-x-altquot + + "'Serbian' alternative quotes" + +:sv: .. class:: language-sv + + "'Swedish' quotes" + +:sv-x-altquot: .. class:: language-sv-x-altquot + + "'Swedish' alternative quotes" + +:tr: .. class:: language-tr + + "'Turkish' quotes" + +:tr-x-altquot: .. class:: language-tr-x-altquot + + "'Turkish' alternative quotes" + +.. 'tr-x-altquot2': '“„‘‚', # antiquated? + +:uk: .. class:: language-uk + + "'Ukrainian' quotes" + +:uk-x-altquot: .. class:: language-uk-x-altquot + + "'Ukrainian' alternative quotes" + +:zh-cn: .. class:: language-zh-cn + + "'Chinese (China)' quotes" + +:zh-tw: .. class:: language-zh-tw + + "'Chinese (Taiwan)' quotes" + +Quotes in text blocks in a non-configured language are kept as plain quotes: + +:undefined: .. class:: language-undefined-example + + "'Undefined' quotes" + +.. [#x-altquot] Tags with the non-standard extension ``-x-altquot`` define + the quote set used with the `"smart_quotes" setting`_ value ``"alt"``. + +.. [#smartquotes-locales] The definitions for language-dependend + typographical quotes can be extended or overwritten using the + `"smartquotes_locales" setting`_. + + The following example ensures a correct leading apostrophe in ``'s + Gravenhage`` (at the cost of incorrect leading single quotes) in Dutch + and sets French quotes to double and single guillemets with inner + spacing:: + + smartquote-locales: nl: „”’’ + fr: « : »:‹ : › + +.. _"smartquotes_locales" setting: config.html#smartquotes-locales + + +Caveats +======= + +Why You Might Not Want to Use "Smart" Quotes in Your Documents +-------------------------------------------------------------- + +For one thing, you might not care. + +Most normal, mentally stable individuals do not take notice of proper +typographic punctuation. Many design and typography nerds, however, break +out in a nasty rash when they encounter, say, a restaurant sign that uses +a straight apostrophe to spell "Joe's". + +If you're the sort of person who just doesn't care, you might well want to +continue not caring. Using straight quotes -- and sticking to the 7-bit +ASCII character set in general -- is certainly a simpler way to live. + +Even if you *do* care about accurate typography, you still might want to +think twice before "auto-educating" the quote characters in your documents. +As there is always a chance that the algorithm gets it wrong, you may +instead prefer to use the compose key or some other means to insert the +correct Unicode characters into the source. + + +Algorithmic Shortcomings +------------------------ + +The ASCII character (u0027 APOSTROPHE) is used for apostrophe and single +quotes. If used inside a word, it is converted into an apostrophe: + + .. class:: language-fr + + Il dit : "C'est 'super' !" + +At the beginning or end of a word, it cannot be distinguished from a single +quote by the algorithm. + +The `right single quotation mark`_ character -- used to close a secondary +(inner) quote in English -- is also "the preferred character to use for +apostrophe" (Unicode_). Therefore, "educating" works as expected for +apostrophes at the end of a word, e.g., + + Mr. Hastings' pen; three days' leave; my two cents' worth. + +However, when apostrophes are used at the start of leading contractions, +"educating" will turn the apostrophe into an *opening* secondary quote. In +English, this is *not* the apostrophe character, e.g., ``'Twas brillig`` +is "miseducated" to + + 'Twas brillig. + +In other locales (French, Italian, German, ...), secondary closing quotes +differ from the apostrophe. A text like:: + + .. class:: language-de-CH + + "Er sagt: 'Ich fass' es nicht.'" + +becomes + + «Er sagt: ‹Ich fass› es nicht.›» + +with a single closing guillemet in place of the apostrophe. + +In such cases, it's best to use the recommended apostrophe character (’) in +the source: + + | ’Twas brillig, and the slithy toves + | Did gyre and gimble in the wabe; + | All mimsy were the borogoves, + | And the mome raths outgrabe. + +.. _right single quotation mark: + http://www.fileformat.info/info/unicode/char/2019/index.htm +.. _Unicode: https://www.unicode.org/charts/PDF/U2000.pdf + +History +======= + +The smartquotes module is an adaption of "SmartyPants_" to Docutils. + +`John Gruber`_ did all of the hard work of writing this software in Perl for +`Movable Type`_ and almost all of this useful documentation. `Chad Miller`_ +ported it to Python to use with Pyblosxom_. + +Portions of the SmartyPants original work are based on Brad Choate's nifty +MTRegex plug-in. `Brad Choate`_ also contributed a few bits of source code to +this plug-in. Brad Choate is a fine hacker indeed. +`Jeremy Hedley`_ and `Charles Wiltgen`_ deserve mention for exemplary beta +testing of the original SmartyPants. + +Internationalization and adaption to Docutils by Günter Milde. + +.. _SmartyPants: http://daringfireball.net/projects/smartypants/ +.. _Pyblosxom: http://pyblosxom.bluesock.org/ +.. _Movable Type: http://www.movabletype.org/ +.. _John Gruber: http://daringfireball.net/ +.. _Chad Miller: http://web.chad.org/ +.. _Brad Choate: http://bradchoate.com/ +.. _Jeremy Hedley: http://antipixel.com/ +.. _Charles Wiltgen: http://playbacktime.com/ +.. _Rael Dornfest: http://raelity.org/ diff --git a/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/tools.txt b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/tools.txt new file mode 100644 index 00000000..8c58e6e1 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_docutils/data/docs/user/tools.txt @@ -0,0 +1,569 @@ +========================== + Docutils Front-End Tools +========================== + +:Author: David Goodger +:Contact: docutils-develop@lists.sourceforge.net +:Revision: $Revision$ +:Date: $Date$ +:Copyright: This document has been placed in the public domain. + +.. contents:: + + +-------------- + Introduction +-------------- + +Once the Docutils package is unpacked, you will discover a ``tools/`` +directory containing several front ends for common Docutils +processing. +In addition to the `generic command line front end`_, Docutils has +many small front ends, each specialized for a specific "Reader" (which +knows how to interpret a file in context), a "Parser" (which +understands the syntax of the text), and a "Writer" (which knows how +to generate a specific data format). + +Most [#]_ front ends have common options and the same command-line usage +pattern (see `the tools`_ below for concrete examples):: + + toolname [options] [<source> [<destination>]] + +Each tool has a "``--help``" option which lists the +`command-line options`_ and arguments it supports. +Processing can also be customized with `configuration files`_. + +The two arguments, "source" and "destination", are optional. If only +one argument (source) is specified, the standard output (stdout) is +used for the destination. If no arguments are specified, the standard +input (stdin) is used for the source. + +.. note:: + Docutils front-end tool support is currently under discussion. + Tool names, install details and the set of auto-installed tools + may change in future Docutils versions. + +.. [#] The exceptions are buildhtml.py_ and rst2odt_prepstyles.py_. + +Getting Help +============ + +First, try the "``--help``" option each front-end tool has. + +Command line options and their corresponding configuration file entries +are detailed in `Docutils Configuration`_. + +Users who have questions or need assistance with Docutils or +reStructuredText should post a message to the Docutils-users_ mailing +list. + +.. _Docutils-users: mailing-lists.html#docutils-users + + +----------- + The Tools +----------- + +Generic Command Line Front End +============================== + +:Readers: Standalone, PEP +:Parsers: reStructuredText, Markdown (requires 3rd party packages) +:Writers: html_, html4css1_, html5_, latex__, manpage_, + odt_, pep_html_, pseudo-xml_, s5_html_, xelatex_, xml_, +:Config_: See `[docutils application]`_ + +The generic front end allows combining "reader", "parser", and +"writer" components from the Docutils package or 3rd party plug-ins. + +Since Docutils 0.19, it can be called by Python's ``-m`` option, +the ``docutils`` script installed in the binary PATH, or the +``docutils-cli.py`` script in the ``tools/`` directory. + +For example, to process a Markdown_ file "``test.md``" into +Pseudo-XML_ :: + + python3 -m docutils --parser=markdown --writer=pseudoxml\ + test.md test.txt + +Use the "--help" option together with the component-selection options +to get the correct list of supported command-line options. Example:: + + docutils --parser=markdown --writer=xml --help + + + +__ +.. _latex2e: +.. _Generating LaTeX with Docutils: latex.html +.. _manpage: manpage.html +.. _Markdown: https://www.markdownguide.org/ +.. _[docutils application]: config.html#docutils-application + + +HTML-Generating Tools +===================== + +buildhtml.py +------------ + +:Readers: Standalone, PEP +:Parser: reStructuredText +:Writers: html_, html5_, pep_html_ +:Config_: `[buildhtml application]`_ + +Use ``buildhtml.py`` to generate ``*.html`` from all the ``*.txt`` files +(including PEPs) in each <directory> given, and their subdirectories +too. (Use the ``--local`` option to skip subdirectories.) + +Usage:: + + buildhtml.py [options] [<directory> ...] + +After unpacking the Docutils package, the following shell commands +will generate HTML for all included documentation:: + + cd docutils/tools + buildhtml.py .. + +For official releases, the directory may be called "docutils-X.Y", +where "X.Y" is the release version. Alternatively:: + + cd docutils + tools/buildhtml.py --config=tools/docutils.conf + +The current directory (and all subdirectories) is chosen by default if +no directory is named. Some files may generate system messages +(docs/user/rst/demo.txt contains intentional errors); use the +``--quiet`` option to suppress all warnings. The ``--config`` option +ensures that the correct settings are in place (a ``docutils.conf`` +`configuration file`_ in the current directory is picked up +automatically). Command-line options may be used to override config +file settings or replace them altogether. + +.. _[buildhtml application]: config.html#buildhtml-application +.. _configuration file: `configuration files`_ + + +rst2html.py +----------- + +:Reader: Standalone +:Parser: reStructuredText +:Writer: html_ + +`rst2html.py` is the front-end for the default Docutils HTML writer. +The default writer may change with the development of HTML, browsers, +Docutils, and the web. Currently, it is html4css1_. + +.. caution:: + Use a specific front end like rst2html4.py_ or rst2html5.py_, + if you depend on stability of the generated HTML code + (e.g., because you use a custom style sheet or post-processing + that may break otherwise). + + +rst2html4.py +------------ + +:Reader: Standalone +:Parser: reStructuredText +:Writer: html4css1_ + +The ``rst2html4.py`` front end reads standalone reStructuredText source +files and produces `XHTML 1.0 Transitional`_ output. +A CSS stylesheet is required for proper rendering; a simple but +complete stylesheet is installed and used by default (see Stylesheets_ +below). + +For example, to process a reStructuredText file "``test.txt``" into +HTML:: + + rst2html.py test.txt test.html + +Now open the "``test.html``" file in your favorite browser to see the +results. To get a footer with a link to the source file, date & time +of processing, and links to the Docutils project, add some options:: + + rst2html.py -stg test.txt test.html + + +Stylesheets +``````````` + +``rst2html.py`` inserts into the generated HTML a cascading stylesheet +(or a link to a stylesheet, when passing the "``--link-stylesheet``" +option). A stylesheet is required for proper rendering. The default +stylesheet (``docutils/writers/html4css1/html4css1.css``, located in +the installation directory) is provided for basic use. To use +different stylesheet(s), specify the stylesheets' location(s) +as comma-separated list with the "``--stylesheet``" (for a URL) +or "``--stylesheet-path``" (for a local file) command-line option, +or with `configuration file`_ settings (e.g. ``./docutils.conf`` +or ``~/.docutils``). To experiment with styles, please see the +`guide to writing HTML (CSS) stylesheets for Docutils`__. + +__ ../howto/html-stylesheets.html +.. _html4css1: html.html#html4css1 +.. _html: html.html#html + + +rst2html5.py +------------ + +:Reader: Standalone +:Parser: reStructuredText +:Writer: html5_ + +The ``rst2html5.py`` front end reads standalone reStructuredText source +files and produces `HTML 5`_ output. +Correct rendering of elements not directly supported by HTML depends on a +CSS style sheet. The provided style sheets ``minimal.css`` and ``plain.css`` +define required and optional styling rules respectively. + +.. _html5: html.html#html5-polyglot + +rstpep2html.py +-------------- + +:Reader: PEP +:Parser: reStructuredText +:Writer: pep_html_ + +``rstpep2html.py`` reads a new-style PEP (marked up with reStructuredText) +and produces `XHTML 1.0 Transitional`_. It requires a template file and a +stylesheet. By default, it makes use of a "``pep-html-template``" file and +the "``pep.css``" stylesheet (both in the ``docutils/writers/pep_html/`` +directory), but these can be overridden by command-line options or +configuration files. + +For example, to process a PEP into HTML:: + + cd <path-to-docutils>/docs/peps + rstpep2html.py pep-0287.txt pep-0287.html + +.. _pep_html: html.html#pep-html + +rst2s5.py +--------- + +:Reader: Standalone +:Parser: reStructuredText +:Writer: s5_html_ + +The ``rst2s5.py`` front end reads standalone reStructuredText source +files and produces (X)HTML output compatible with S5_, the "Simple +Standards-based Slide Show System" by Eric Meyer. A theme is required +for proper rendering; several are distributed with Docutils and others +are available; see Themes_ below. + +For example, to process a reStructuredText file "``slides.txt``" into +S5/HTML:: + + rst2s5.py slides.txt slides.html + +Now open the "``slides.html``" file in your favorite browser, switch +to full-screen mode, and enjoy the results. + +.. _S5: http://meyerweb.com/eric/tools/s5/ +.. _s5_html: html.html#s5-html + +Themes +`````` + +Each S5 theme consists of a directory containing several files: +stylesheets, JavaScript, and graphics. These are copied into a +``ui/<theme>`` directory beside the generated HTML. A theme is chosen +using the "``--theme``" option (for themes that come with Docutils) or +the "``--theme-url``" option (for themes anywhere). For example, the +"medium-black" theme can be specified as follows:: + + rst2s5.py --theme medium-black slides.txt slides.html + +The theme will be copied to the ``ui/medium-black`` directory. + +Several themes are included with Docutils: + +``default`` + This is a simplified version of S5's default theme. + + :Main content: black serif text on a white background + :Text capacity: about 13 lines + :Headers: light blue, bold sans-serif text on a dark blue + background; titles are limited to one line + :Footers: small, gray, bold sans-serif text on a dark blue + background + +``small-white`` + (Small text on a white background.) + + :Main content: black serif text on a white background + :Text capacity: about 15 lines + :Headers: black, bold sans-serif text on a white background; + titles wrap + :Footers: small, dark gray, bold sans-serif text on a white + background + +``small-black`` + :Main content: white serif text on a black background + :Text capacity: about 15 lines + :Headers: white, bold sans-serif text on a black background; + titles wrap + :Footers: small, light gray, bold sans-serif text on a black + background + +``medium-white`` + :Main content: black serif text on a white background + :Text capacity: about 9 lines + :Headers: black, bold sans-serif text on a white background; + titles wrap + :Footers: small, dark gray, bold sans-serif text on a white + background + +``medium-black`` + :Main content: white serif text on a black background + :Text capacity: about 9 lines + :Headers: white, bold sans-serif text on a black background; + titles wrap + :Footers: small, light gray, bold sans-serif text on a black + background + +``big-white`` + :Main content: black, bold sans-serif text on a white background + :Text capacity: about 5 lines + :Headers: black, bold sans-serif text on a white background; + titles wrap + :Footers: not displayed + +``big-black`` + :Main content: white, bold sans-serif text on a black background + :Text capacity: about 5 lines + :Headers: white, bold sans-serif text on a black background; + titles wrap + :Footers: not displayed + +If a theme directory contains a file named ``__base__``, the name of +the theme's base theme will be read from it. Files are accumulated +from the named theme, any base themes, and the "default" theme (which +is the implicit base of all themes). + +For details, please see `Easy Slide Shows With reStructuredText & +S5 <slide-shows.html>`_. + + +.. _HTML 5: https://www.w3.org/TR/html5/ +.. _HTML 4.1: https://www.w3.org/TR/html401/ +.. _XHTML 1.0 Transitional: https://www.w3.org/TR/xhtml1/ +.. _XHTML 1.1: https://www.w3.org/TR/xhtml1/ + + +LaTeX-Generating Tools +====================== + +rst2latex.py +------------ + +:Reader: Standalone +:Parser: reStructuredText +:Writer: latex2e_ + +The ``rst2latex.py`` front end reads standalone reStructuredText +source files and produces LaTeX_ output. For example, to process a +reStructuredText file "``test.txt``" into LaTeX:: + + rst2latex.py test.txt test.tex + +The output file "``test.tex``" should then be processed with ``latex`` +or ``pdflatex`` to get a document in DVI, PostScript or PDF format for +printing or on-screen viewing. + +For details see `Generating LaTeX with Docutils`_. + +rst2xetex.py +------------ + +:Reader: Standalone +:Parser: reStructuredText +:Writer: _`xelatex` + +The ``rst2xetex.py`` front end reads standalone reStructuredText source +files and produces `LaTeX` output for processing with Unicode-aware +TeX engines (`LuaTeX`_ or `XeTeX`_). For example, to process a +reStructuredText file "``test.txt``" into LaTeX:: + + rst2xetex.py test.txt test.tex + +The output file "``test.tex``" should then be processed with ``xelatex`` or +``lualatex`` to get a document in PDF format for printing or on-screen +viewing. + +For details see `Generating LaTeX with Docutils`_. + +.. _LaTeX: https://en.wikipedia.org/wiki/LaTeX +.. _XeTeX: https://en.wikipedia.org/wiki/XeTeX +.. _LuaTeX: https://en.wikipedia.org/wiki/LuaTeX + + +Man-Page-Generating Tools +========================= + +rst2man.py +---------- + +:Reader: Standalone +:Parser: reStructuredText +:Writer: manpage_ + +The ``rst2man.py`` front end reads standalone reStructuredText source +files and produces troff_ sources for Unix man pages. + +.. _troff: https://troff.org/ + + +ODF/OpenOffice-Generating Tools +=============================== + +rst2odt.py +---------- + +:Reader: Standalone +:Parser: reStructuredText +:Writer: odt_ + +The ``rst2odt.py`` front end reads standalone reStructuredText +source files and produces ODF/.odt files that can be read, edited, +printed, etc with OpenOffice_ ``oowriter`` or LibreOffice_ ``lowriter``. +A stylesheet file is required. A +stylesheet file is an OpenOffice .odt file containing definitions +of the styles required for ``rst2odt.py``. +For details, see `Odt Writer for Docutils`_. + +.. _OpenOffice: https://www.openoffice.org/ +.. _LibreOffice: https://www.libreoffice.org/ +.. _odt: +.. _Odt Writer for Docutils: odt.html + +rst2odt_prepstyles.py +````````````````````` + +A helper tool to fix a word-processor-generated STYLE_FILE.odt for +odtwriter use:: + + rst2odt_prepstyles STYLE_FILE.odt + +See `Odt Writer for Docutils`__ for details. + +__ odt.html#page-size + + +reStructuredText-Generating Tools +================================= + +Currently, there is no reStructuredText writer in Docutils and therefore +an ``rst2rst.py`` tool is still missing. + +To generate reStructuredText documents with Docutils, you can use +the XML (Docutils native) writer and the xml2rst_ processor. + + +XML-Generating Tools +==================== + +rst2xml.py +---------- + +:Reader: Standalone +:Parser: reStructuredText +:Writer: _`XML` (Docutils native) + +The ``rst2xml.py`` front end produces Docutils-native XML output. +This can be transformed with standard XML tools such as XSLT +processors into arbitrary final forms. An example is the xml2rst_ processor +in the Docutils sandbox. + +.. _xml2rst: ../../../sandbox/xml2rst + + +Testing/Debugging Tools +======================= + +rst2pseudoxml.py +---------------- + +:Reader: Standalone +:Parser: reStructuredText +:Writer: _`Pseudo-XML` + +``rst2pseudoxml.py`` is used for debugging the Docutils "Reader to +Transform to Writer" pipeline. It produces a compact pretty-printed +"pseudo-XML", where nesting is indicated by indentation (no end-tags). +External attributes for all elements are output, and internal +attributes for any leftover "pending" elements are also given. + + +quicktest.py +------------ + +:Reader: N/A +:Parser: reStructuredText +:Writer: N/A + +The ``quicktest.py`` tool is used for testing the reStructuredText +parser. It does not use a Docutils Reader or Writer or the standard +Docutils command-line options. Rather, it does its own I/O and calls +the parser directly. No transforms are applied to the parsed +document. Possible output forms output include: + +--pretty Pretty-printed pseudo-XML (default) + +--test Test data (Python list of input and pseudo-XML output strings; + useful for creating new test cases) +--xml Pretty-printed native XML +--rawxml Raw native XML (with or without a stylesheet reference) +--help Usage hint and complete list of supported options. + + +--------------- + Customization +--------------- + +Most front-end tools support the options/settings from the generic +`configuration file sections`_ plus the sections of their components +(reader, writer, parser). [#]_ +Some front-end tools also add application-specific settings. + +.. [#] The exceptions are quicktest.py_ and rst2odt_prepstyles.py_. + + +Command-Line Options +==================== + +Command-line options are intended for one-off customization. +They take priority over configuration file settings. + +Use the "--help" option on each of the front ends to list the +command-line options it supports. + + +Configuration Files +=================== + +Configuration files are used for persistent customization; they can be +set once and take effect every time you use a front-end tool. + +Command-line options and their corresponding configuration file entry +names are listed in the `Docutils Configuration`_ document. + +.. _Docutils Configuration: config.html +.. _Config: +.. _configuration file sections: + config.html#configuration-file-sections-entries + + +.. + Local Variables: + mode: indented-text + indent-tabs-mode: nil + sentence-end-double-space: t + fill-column: 70 + End: diff --git a/pyperformance/data-files/benchmarks/bm_docutils/pyproject.toml b/pyperformance/data-files/benchmarks/bm_docutils/pyproject.toml new file mode 100644 index 00000000..005a914d --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_docutils/pyproject.toml @@ -0,0 +1,13 @@ +[project] +name = "pyperformance_bm_docutils" +requires-python = ">=3.8" +dependencies = [ + "pyperf", + "docutils", +] +urls.repository = "https://github.com/python/pyperformance" +dynamic = ["version"] + +[tool.pyperformance] +name = "docutils" +tags = "apps" diff --git a/pyperformance/data-files/benchmarks/bm_docutils/requirements.txt b/pyperformance/data-files/benchmarks/bm_docutils/requirements.txt new file mode 100644 index 00000000..1ffc5119 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_docutils/requirements.txt @@ -0,0 +1 @@ +docutils==0.18.1 diff --git a/pyperformance/data-files/benchmarks/bm_docutils/run_benchmark.py b/pyperformance/data-files/benchmarks/bm_docutils/run_benchmark.py new file mode 100644 index 00000000..1b07235a --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_docutils/run_benchmark.py @@ -0,0 +1,55 @@ +""" +Convert Docutils' documentation from reStructuredText to <format>. +""" + +import contextlib +from pathlib import Path +import time + +import docutils +from docutils import core +import pyperf + +try: + from docutils.utils.math.math2html import Trace +except ImportError: + pass +else: + Trace.show = lambda message, channel: ... # don't print to console + +DOC_ROOT = (Path(__file__).parent / "data" / "docs").resolve() + + +def build_html(doc_root): + elapsed = 0 + for file in doc_root.rglob("*.txt"): + file_contents = file.read_text(encoding="utf-8") + t0 = pyperf.perf_counter() + with contextlib.suppress(docutils.ApplicationError): + core.publish_string(source=file_contents, + reader_name="standalone", + parser_name="restructuredtext", + writer_name="html5", + settings_overrides={ + "input_encoding": "unicode", + "output_encoding": "unicode", + "report_level": 5, + }) + elapsed += pyperf.perf_counter() - t0 + return elapsed + + +def bench_docutils(loops, doc_root): + runs_total = 0 + for _ in range(loops): + runs_total += build_html(doc_root) + return runs_total + + +if __name__ == "__main__": + runner = pyperf.Runner() + + runner.metadata['description'] = "Render documentation with Docutils" + args = runner.parse_args() + + runner.bench_time_func("docutils", bench_docutils, DOC_ROOT)