-
-
Notifications
You must be signed in to change notification settings - Fork 643
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add support for TOML config files (pants.toml
)
#9052
Changes from all commits
008bacc
052cd22
7306439
d0db31b
43cd952
fb49453
cfe295f
5b4044d
e87f156
1f5d8e9
5469e27
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,12 +6,15 @@ | |
import io | ||
import itertools | ||
import os | ||
import re | ||
from abc import ABC, abstractmethod | ||
from contextlib import contextmanager | ||
from dataclasses import dataclass | ||
from hashlib import sha1 | ||
from pathlib import PurePath | ||
from typing import Any, ClassVar, Dict, List, Mapping, Optional, Sequence, Tuple, Union, cast | ||
|
||
import toml | ||
from twitter.common.collections import OrderedSet | ||
from typing_extensions import Literal | ||
|
||
|
@@ -90,11 +93,19 @@ def _meta_load( | |
content_digest = sha1(content).hexdigest() | ||
normalized_seed_values = cls._determine_seed_values(seed_values=seed_values) | ||
|
||
ini_parser = configparser.ConfigParser(defaults=normalized_seed_values) | ||
ini_parser.read_string(content.decode()) | ||
config_values: _ConfigValues | ||
if PurePath(config_path).suffix == ".toml": | ||
toml_values = cast(Dict[str, Any], toml.loads(content.decode())) | ||
toml_values["DEFAULT"] = {**normalized_seed_values, **toml_values.get("DEFAULT", {})} | ||
config_values = _TomlValues(toml_values) | ||
else: | ||
ini_parser = configparser.ConfigParser(defaults=normalized_seed_values) | ||
ini_parser.read_string(content.decode()) | ||
config_values = _IniValues(ini_parser) | ||
|
||
single_file_configs.append( | ||
_SingleFileConfig( | ||
config_path=config_path, content_digest=content_digest, values=_IniValues(ini_parser), | ||
config_path=config_path, content_digest=content_digest, values=config_values, | ||
), | ||
) | ||
return _ChainedConfig(tuple(reversed(single_file_configs))) | ||
|
@@ -182,10 +193,11 @@ def get_source_for_option(self, section: str, option: str) -> Optional[str]: | |
class _ConfigValues(ABC): | ||
"""Encapsulates resolving the actual config values specified by the user's config file. | ||
|
||
Beyond providing better encapsulation, this allows us to support alternative config file formats | ||
in the future if we ever decide to support formats other than INI. | ||
Due to encapsulation, this allows us to support both TOML and INI config files without any of | ||
the rest of the Pants codebase knowing whether the config came from TOML or INI. | ||
""" | ||
|
||
@property | ||
@abstractmethod | ||
def sections(self) -> List[str]: | ||
"""Returns the sections in this config (not including DEFAULT).""" | ||
|
@@ -206,15 +218,17 @@ def get_value(self, section: str, option: str) -> Optional[str]: | |
def options(self, section: str) -> List[str]: | ||
"""All options defined for the section.""" | ||
|
||
@property | ||
@abstractmethod | ||
def defaults(self) -> Mapping[str, Any]: | ||
def defaults(self) -> Mapping[str, str]: | ||
"""All the DEFAULT values (not interpolated).""" | ||
|
||
|
||
@dataclass(frozen=True) | ||
class _IniValues(_ConfigValues): | ||
parser: configparser.ConfigParser | ||
|
||
@property | ||
def sections(self) -> List[str]: | ||
return self.parser.sections() | ||
|
||
|
@@ -230,10 +244,196 @@ def get_value(self, section: str, option: str) -> Optional[str]: | |
def options(self, section: str) -> List[str]: | ||
return self.parser.options(section) | ||
|
||
@property | ||
def defaults(self) -> Mapping[str, str]: | ||
return self.parser.defaults() | ||
|
||
|
||
_TomlPrimitve = Union[bool, int, float, str] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We don't (yet?) account for TOML's native support of date times. We don't have any options that use that |
||
_TomlValue = Union[_TomlPrimitve, List[_TomlPrimitve]] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This could also technically be a |
||
|
||
|
||
@dataclass(frozen=True) | ||
class _TomlValues(_ConfigValues): | ||
values: Dict[str, Any] | ||
|
||
@staticmethod | ||
def _is_an_option(option_value: Union[_TomlValue, Dict]) -> bool: | ||
"""Determine if the value is actually an option belonging to that section. | ||
|
||
A value that looks like an option might actually be a subscope, e.g. the option value | ||
`java` belonging to the section `cache` could actually be the section `cache.java`, rather | ||
than the option `--cache-java`. | ||
|
||
We must also handle the special syntax of `my_list_option.add` and `my_list_option.remove`. | ||
""" | ||
if isinstance(option_value, dict): | ||
return "add" in option_value or "remove" in option_value | ||
return True | ||
|
||
@staticmethod | ||
def _section_explicitly_defined(section_values: Dict) -> bool: | ||
"""Determine if the section is truly a defined section, meaning that the user explicitly wrote | ||
the section in their config file. | ||
|
||
For example, the user may have explicitly defined `cache.java` but never defined `cache`. Due | ||
to TOML's representation of the config as a nested dictionary, naively, it would appear that | ||
`cache` was defined even though the user never explicitly added it to their config. | ||
""" | ||
at_least_one_option_defined = any( | ||
_TomlValues._is_an_option(section_value) for section_value in section_values.values() | ||
) | ||
# We also check if the section was explicitly defined but has no options. We can be confident | ||
# that this is not a parent scope (e.g. `cache` when `cache.java` is really what was defined) | ||
# because the parent scope would store its child scope in its values, so the values would not | ||
# be empty. | ||
blank_section = len(section_values.values()) == 0 | ||
Eric-Arellano marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return at_least_one_option_defined or blank_section | ||
|
||
def _find_section_values(self, section: str) -> Optional[Dict]: | ||
"""Find the values for a section, if any. | ||
|
||
For example, if the config file was `{'GLOBAL': {'foo': 1}}`, this function would return | ||
`{'foo': 1}` given `section='GLOBAL'`. | ||
""" | ||
def recurse(mapping: Dict, *, remaining_sections: List[str]) -> Optional[Dict]: | ||
if not remaining_sections: | ||
return None | ||
current_section = remaining_sections[0] | ||
if current_section not in mapping: | ||
return None | ||
section_values = mapping[current_section] | ||
if len(remaining_sections) > 1: | ||
return recurse(section_values, remaining_sections=remaining_sections[1:]) | ||
if not self._section_explicitly_defined(section_values): | ||
return None | ||
return cast(Dict, section_values) | ||
|
||
return recurse(mapping=self.values, remaining_sections=section.split(".")) | ||
|
||
def _possibly_interpolate_value(self, raw_value: str) -> str: | ||
"""For any values with %(foo)s, substitute it with the corresponding default val.""" | ||
def format_str(value: str) -> str: | ||
# Because dictionaries use the symbols `{}`, we must proactively escape the symbols so that | ||
# .format() does not try to improperly interpolate. | ||
escaped_str = value.replace("{", "{{").replace("}", "}}") | ||
new_style_format_str = re.sub( | ||
pattern=r"%\((?P<interpolated>[a-zA-Z_0-9]*)\)s", | ||
repl=r"{\g<interpolated>}", | ||
Comment on lines
+321
to
+322
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Generally, it's best practice to use Lmk if I should use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If python3 automatically caches regexes, and these regexes are only used once on options parsing and then not needed for the rest of the run, then it's probably best not to |
||
string=escaped_str, | ||
) | ||
return new_style_format_str.format(**self.defaults) | ||
|
||
def recursively_format_str(value: str) -> str: | ||
# It's possible to interpolate with a value that itself has an interpolation. We must fully | ||
# evaluate all expressions for parity with configparser. | ||
if not re.search(r"%\([a-zA-Z_0-9]*\)s", value): | ||
return value | ||
return recursively_format_str(value=format_str(value)) | ||
|
||
return recursively_format_str(raw_value) | ||
|
||
def _stringify_val( | ||
self, raw_value: _TomlValue, *, interpolate: bool = True, list_prefix: Optional[str] = None, | ||
) -> str: | ||
"""For parity with configparser, we convert all values back to strings, which allows us to | ||
avoid upstream changes to files like parser.py. | ||
|
||
This is clunky. If we drop INI support, we should remove this and use native values.""" | ||
Comment on lines
+336
to
+342
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Originally, I did not take this approach and preserved the parsed data types. But, I realized for the initial prototype that this complicates things too much because it means we have a leaky API. For example, it would be really hard to preserve the If we end up removing INI, we could remove this all. |
||
if isinstance(raw_value, str): | ||
return self._possibly_interpolate_value(raw_value) if interpolate else raw_value | ||
|
||
if isinstance(raw_value, list): | ||
def stringify_list_member(member: _TomlPrimitve) -> str: | ||
if not isinstance(member, str): | ||
return str(member) | ||
interpolated_member = self._possibly_interpolate_value(member) if interpolate else member | ||
return f'"{interpolated_member}"' | ||
|
||
list_members = ", ".join(stringify_list_member(member) for member in raw_value) | ||
return f"{list_prefix or ''}[{list_members}]" | ||
|
||
return str(raw_value) | ||
|
||
@property | ||
def sections(self) -> List[str]: | ||
sections: List[str] = [] | ||
|
||
def recurse(mapping: Dict, *, parent_section: Optional[str] = None) -> None: | ||
for section, section_values in mapping.items(): | ||
if not isinstance(section_values, dict): | ||
continue | ||
# We filter out "DEFAULT" and also check for the special `my_list_option.add` and | ||
# `my_list_option.remove` syntax. | ||
if section == "DEFAULT" or "add" in section_values or "remove" in section_values: | ||
continue | ||
section_name = section if not parent_section else f"{parent_section}.{section}" | ||
if self._section_explicitly_defined(section_values): | ||
sections.append(section_name) | ||
recurse(section_values, parent_section=section_name) | ||
|
||
recurse(self.values) | ||
return sections | ||
|
||
def has_section(self, section: str) -> bool: | ||
return self._find_section_values(section) is not None | ||
|
||
def has_option(self, section: str, option: str) -> bool: | ||
try: | ||
self.get_value(section, option) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Technically, |
||
except (configparser.NoSectionError, configparser.NoOptionError): | ||
return False | ||
else: | ||
return True | ||
|
||
def get_value(self, section: str, option: str) -> Optional[str]: | ||
section_values = self._find_section_values(section) | ||
if section_values is None: | ||
raise configparser.NoSectionError(section) | ||
if option in self.defaults: | ||
return self._stringify_val(self.defaults[option]) | ||
if option not in section_values: | ||
raise configparser.NoOptionError(option, section) | ||
option_value = section_values[option] | ||
# Handle the special `my_list_option.add` and `my_list_option.remove` syntax. | ||
if isinstance(option_value, dict): | ||
has_add = "add" in option_value | ||
has_remove = "remove" in option_value | ||
if not has_add and not has_remove: | ||
raise configparser.NoOptionError(option, section) | ||
add_val = ( | ||
self._stringify_val(option_value["add"], list_prefix="+") if has_add else None | ||
) | ||
remove_val = ( | ||
self._stringify_val(option_value["remove"], list_prefix="-") if has_remove else None | ||
) | ||
if has_add and has_remove: | ||
return f"{add_val},{remove_val}" | ||
if has_add: | ||
return add_val | ||
return remove_val | ||
return self._stringify_val(option_value) | ||
|
||
def options(self, section: str) -> List[str]: | ||
section_values = self._find_section_values(section) | ||
if section_values is None: | ||
raise configparser.NoSectionError(section) | ||
options = [ | ||
option | ||
for option, option_value in section_values.items() | ||
if self._is_an_option(option_value) | ||
] | ||
options.extend(self.defaults.keys()) | ||
return options | ||
|
||
@property | ||
def defaults(self) -> Mapping[str, str]: | ||
return { | ||
option: self._stringify_val(option_val, interpolate=False) | ||
for option, option_val in self.values["DEFAULT"].items() | ||
} | ||
|
||
|
||
@dataclass(frozen=True) | ||
class _EmptyConfig(Config): | ||
"""A dummy config with no data at all.""" | ||
|
@@ -274,7 +474,7 @@ def sources(self) -> List[str]: | |
return [self.config_path] | ||
|
||
def sections(self) -> List[str]: | ||
return self.values.sections() | ||
return self.values.sections | ||
|
||
def has_section(self, section: str) -> bool: | ||
return self.values.has_section(section) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Futureproofing that actually worked! Wooho!