Skip to content

Commit

Permalink
Initial support for reading mapping configuration as TOML (#1108)
Browse files Browse the repository at this point in the history
* Rename parse_mapping to parse_mapping_cfg and remove duplicated test
* Add initial support for TOML mapping configuration (prefer tomllib to tomli)

---------

Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com>
Co-authored-by: Tomas R <tomas.roun8@gmail.com>
  • Loading branch information
3 people authored Aug 7, 2024
1 parent 34ed517 commit d26a669
Show file tree
Hide file tree
Showing 15 changed files with 274 additions and 66 deletions.
168 changes: 121 additions & 47 deletions babel/messages/frontend.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,11 @@
import shutil
import sys
import tempfile
import warnings
from collections import OrderedDict
from configparser import RawConfigParser
from io import StringIO
from typing import Iterable
from typing import BinaryIO, Iterable, Literal

from babel import Locale, localedata
from babel import __version__ as VERSION
Expand Down Expand Up @@ -53,6 +54,12 @@ class SetupError(BaseError):
pass


class ConfigurationError(BaseError):
"""
Raised for errors in configuration files.
"""


def listify_value(arg, split=None):
"""
Make a list out of an argument.
Expand Down Expand Up @@ -534,16 +541,29 @@ def _get_mappings(self):
mappings = []

if self.mapping_file:
with open(self.mapping_file) as fileobj:
method_map, options_map = parse_mapping(fileobj)
if self.mapping_file.endswith(".toml"):
with open(self.mapping_file, "rb") as fileobj:
file_style = (
"pyproject.toml"
if os.path.basename(self.mapping_file) == "pyproject.toml"
else "standalone"
)
method_map, options_map = _parse_mapping_toml(
fileobj,
filename=self.mapping_file,
style=file_style,
)
else:
with open(self.mapping_file) as fileobj:
method_map, options_map = parse_mapping_cfg(fileobj, filename=self.mapping_file)
for path in self.input_paths:
mappings.append((path, method_map, options_map))

elif getattr(self.distribution, 'message_extractors', None):
message_extractors = self.distribution.message_extractors
for path, mapping in message_extractors.items():
if isinstance(mapping, str):
method_map, options_map = parse_mapping(StringIO(mapping))
method_map, options_map = parse_mapping_cfg(StringIO(mapping))
else:
method_map, options_map = [], {}
for pattern, method, options in mapping:
Expand Down Expand Up @@ -980,53 +1000,19 @@ def main():


def parse_mapping(fileobj, filename=None):
"""Parse an extraction method mapping from a file-like object.
warnings.warn(
"parse_mapping is deprecated, use parse_mapping_cfg instead",
DeprecationWarning,
stacklevel=2,
)
return parse_mapping_cfg(fileobj, filename)

>>> buf = StringIO('''
... [extractors]
... custom = mypackage.module:myfunc
...
... # Python source files
... [python: **.py]
...
... # Genshi templates
... [genshi: **/templates/**.html]
... include_attrs =
... [genshi: **/templates/**.txt]
... template_class = genshi.template:TextTemplate
... encoding = latin-1
...
... # Some custom extractor
... [custom: **/custom/*.*]
... ''')
>>> method_map, options_map = parse_mapping(buf)
>>> len(method_map)
4
>>> method_map[0]
('**.py', 'python')
>>> options_map['**.py']
{}
>>> method_map[1]
('**/templates/**.html', 'genshi')
>>> options_map['**/templates/**.html']['include_attrs']
''
>>> method_map[2]
('**/templates/**.txt', 'genshi')
>>> options_map['**/templates/**.txt']['template_class']
'genshi.template:TextTemplate'
>>> options_map['**/templates/**.txt']['encoding']
'latin-1'
>>> method_map[3]
('**/custom/*.*', 'mypackage.module:myfunc')
>>> options_map['**/custom/*.*']
{}

def parse_mapping_cfg(fileobj, filename=None):
"""Parse an extraction method mapping from a file-like object.
:param fileobj: a readable file-like object containing the configuration
text to parse
:see: `extract_from_directory`
"""
extractors = {}
method_map = []
Expand All @@ -1053,6 +1039,94 @@ def parse_mapping(fileobj, filename=None):
return method_map, options_map


def _parse_config_object(config: dict, *, filename="(unknown)"):
extractors = {}
method_map = []
options_map = {}

extractors_read = config.get("extractors", {})
if not isinstance(extractors_read, dict):
raise ConfigurationError(f"{filename}: extractors: Expected a dictionary, got {type(extractors_read)!r}")
for method, callable_spec in extractors_read.items():
if not isinstance(method, str):
# Impossible via TOML, but could happen with a custom object.
raise ConfigurationError(f"{filename}: extractors: Extraction method must be a string, got {method!r}")
if not isinstance(callable_spec, str):
raise ConfigurationError(f"{filename}: extractors: Callable specification must be a string, got {callable_spec!r}")
extractors[method] = callable_spec

if "mapping" in config:
raise ConfigurationError(f"{filename}: 'mapping' is not a valid key, did you mean 'mappings'?")

mappings_read = config.get("mappings", [])
if not isinstance(mappings_read, list):
raise ConfigurationError(f"{filename}: mappings: Expected a list, got {type(mappings_read)!r}")
for idx, entry in enumerate(mappings_read):
if not isinstance(entry, dict):
raise ConfigurationError(f"{filename}: mappings[{idx}]: Expected a dictionary, got {type(entry)!r}")
entry = entry.copy()

method = entry.pop("method", None)
if not isinstance(method, str):
raise ConfigurationError(f"{filename}: mappings[{idx}]: 'method' must be a string, got {method!r}")
method = extractors.get(method, method) # Map the extractor name to the callable now

pattern = entry.pop("pattern", None)
if not isinstance(pattern, (list, str)):
raise ConfigurationError(f"{filename}: mappings[{idx}]: 'pattern' must be a list or a string, got {pattern!r}")
if not isinstance(pattern, list):
pattern = [pattern]

for pat in pattern:
if not isinstance(pat, str):
raise ConfigurationError(f"{filename}: mappings[{idx}]: 'pattern' elements must be strings, got {pat!r}")
method_map.append((pat, method))
options_map[pat] = entry

return method_map, options_map


def _parse_mapping_toml(
fileobj: BinaryIO,
filename: str = "(unknown)",
style: Literal["standalone", "pyproject.toml"] = "standalone",
):
"""Parse an extraction method mapping from a binary file-like object.
.. warning: As of this version of Babel, this is a private API subject to changes.
:param fileobj: a readable binary file-like object containing the configuration TOML to parse
:param filename: the name of the file being parsed, for error messages
:param style: whether the file is in the style of a `pyproject.toml` file, i.e. whether to look for `tool.babel`.
"""
try:
import tomllib
except ImportError:
try:
import tomli as tomllib
except ImportError as ie: # pragma: no cover
raise ImportError("tomli or tomllib is required to parse TOML files") from ie

try:
parsed_data = tomllib.load(fileobj)
except tomllib.TOMLDecodeError as e:
raise ConfigurationError(f"{filename}: Error parsing TOML file: {e}") from e

if style == "pyproject.toml":
try:
babel_data = parsed_data["tool"]["babel"]
except (TypeError, KeyError) as e:
raise ConfigurationError(f"{filename}: No 'tool.babel' section found in file") from e
elif style == "standalone":
babel_data = parsed_data
if "babel" in babel_data:
raise ConfigurationError(f"{filename}: 'babel' should not be present in a stand-alone configuration file")
else: # pragma: no cover
raise ValueError(f"Unknown TOML style {style!r}")

return _parse_config_object(babel_data, filename=filename)


def _parse_spec(s: str) -> tuple[int | None, tuple[int | tuple[int, str], ...]]:
inds = []
number = None
Expand Down
101 changes: 82 additions & 19 deletions tests/messages/test_frontend.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,14 @@
# history and logs, available at http://babel.edgewall.org/log/.
import logging
import os
import re
import shlex
import shutil
import sys
import time
import unittest
from datetime import datetime, timedelta
from functools import partial
from io import BytesIO, StringIO
from typing import List

Expand Down Expand Up @@ -1388,25 +1390,86 @@ def test_update_init_missing(self):
assert len(catalog) == 4 # Catalog was updated


def test_parse_mapping():
buf = StringIO(
'[extractors]\n'
'custom = mypackage.module:myfunc\n'
'\n'
'# Python source files\n'
'[python: **.py]\n'
'\n'
'# Genshi templates\n'
'[genshi: **/templates/**.html]\n'
'include_attrs =\n'
'[genshi: **/templates/**.txt]\n'
'template_class = genshi.template:TextTemplate\n'
'encoding = latin-1\n'
'\n'
'# Some custom extractor\n'
'[custom: **/custom/*.*]\n')

method_map, options_map = frontend.parse_mapping(buf)
mapping_cfg = """
[extractors]
custom = mypackage.module:myfunc
# Python source files
[python: **.py]
# Genshi templates
[genshi: **/templates/**.html]
include_attrs =
[genshi: **/templates/**.txt]
template_class = genshi.template:TextTemplate
encoding = latin-1
# Some custom extractor
[custom: **/custom/*.*]
"""

mapping_toml = """
[extractors]
custom = "mypackage.module:myfunc"
# Python source files
[[mappings]]
method = "python"
pattern = "**.py"
# Genshi templates
[[mappings]]
method = "genshi"
pattern = "**/templates/**.html"
include_attrs = ""
[[mappings]]
method = "genshi"
pattern = "**/templates/**.txt"
template_class = "genshi.template:TextTemplate"
encoding = "latin-1"
# Some custom extractor
[[mappings]]
method = "custom"
pattern = "**/custom/*.*"
"""


@pytest.mark.parametrize(
("data", "parser", "preprocess", "is_toml"),
[
(
mapping_cfg,
frontend.parse_mapping_cfg,
None,
False,
),
(
mapping_toml,
frontend._parse_mapping_toml,
None,
True,
),
(
mapping_toml,
partial(frontend._parse_mapping_toml, style="pyproject.toml"),
lambda s: re.sub(r"^(\[+)", r"\1tool.babel.", s, flags=re.MULTILINE),
True,
),
],
ids=("cfg", "toml", "pyproject-toml"),
)
def test_parse_mapping(data: str, parser, preprocess, is_toml):
if preprocess:
data = preprocess(data)
if is_toml:
buf = BytesIO(data.encode())
else:
buf = StringIO(data)

method_map, options_map = parser(buf)
assert len(method_map) == 4

assert method_map[0] == ('**.py', 'python')
Expand Down
38 changes: 38 additions & 0 deletions tests/messages/test_toml_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import pathlib
from io import BytesIO

import pytest

from babel.messages import frontend

toml_test_cases_path = pathlib.Path(__file__).parent / "toml-test-cases"
assert toml_test_cases_path.is_dir(), "toml-test-cases directory not found"


def test_toml_mapping_multiple_patterns():
"""
Test that patterns may be specified as a list in TOML,
and are expanded to multiple entries in the method map.
"""
method_map, options_map = frontend._parse_mapping_toml(BytesIO(b"""
[[mappings]]
method = "python"
pattern = ["xyz/**.py", "foo/**.py"]
"""))
assert len(method_map) == 2
assert method_map[0] == ('xyz/**.py', 'python')
assert method_map[1] == ('foo/**.py', 'python')


@pytest.mark.parametrize("test_case", toml_test_cases_path.glob("bad.*.toml"), ids=lambda p: p.name)
def test_bad_toml_test_case(test_case: pathlib.Path):
"""
Test that bad TOML files raise a ValueError.
"""
with pytest.raises(frontend.ConfigurationError):
with test_case.open("rb") as f:
frontend._parse_mapping_toml(
f,
filename=test_case.name,
style="pyproject.toml" if "pyproject" in test_case.name else "standalone",
)
2 changes: 2 additions & 0 deletions tests/messages/toml-test-cases/bad.extractor.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[extractors]
custom = { module = "mypackage.module", func = "myfunc" }
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[[extractors]]
3 changes: 3 additions & 0 deletions tests/messages/toml-test-cases/bad.just-a-mapping.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[mapping]
method = "jinja2"
pattern = "**.html"
1 change: 1 addition & 0 deletions tests/messages/toml-test-cases/bad.mapping-not-a-dict.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
mappings = [8]
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
mappings = "python"
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[[mappings]]
pattern = ["xyz/**.py", "foo/**.py"]
Loading

0 comments on commit d26a669

Please sign in to comment.