Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for TOML config files (pants.toml) #9052

Merged
merged 11 commits into from
Feb 7, 2020
1 change: 1 addition & 0 deletions 3rdparty/python/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ requests[security]>=2.20.1
responses==0.10.4
setproctitle==1.1.10
setuptools==40.6.3
toml==0.10.0
typing-extensions==3.7.4
wheel==0.31.1
www-authenticate==0.9.2
6 changes: 5 additions & 1 deletion src/python/pants/base/build_environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import logging
import os
from pathlib import Path
from typing import Optional

from pants.base.build_root import BuildRoot
Expand Down Expand Up @@ -51,7 +52,10 @@ def get_pants_configdir() -> str:


def get_default_pants_config_file() -> str:
"""Return the default location of the pants config file."""
"""Return the default location of the Pants config file."""
default_toml = Path(get_buildroot(), "pants.toml")
if default_toml.is_file():
return str(default_toml)
return os.path.join(get_buildroot(), 'pants.ini')


Expand Down
1 change: 1 addition & 0 deletions src/python/pants/option/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ python_library(
'3rdparty/python:python-Levenshtein',
'3rdparty/python:PyYAML',
'3rdparty/python:setuptools',
'3rdparty/python:toml',
'3rdparty/python:typing-extensions',
'3rdparty/python/twitter/commons:twitter.common.collections',
'src/python/pants/base:build_environment',
Expand Down
214 changes: 207 additions & 7 deletions src/python/pants/option/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,15 @@
import io
import itertools
import os
import re
from abc import ABC, abstractmethod
from contextlib import contextmanager
from dataclasses import dataclass
from hashlib import sha1
from pathlib import PurePath
from typing import Any, ClassVar, Dict, List, Mapping, Optional, Sequence, Tuple, Union, cast

import toml
from twitter.common.collections import OrderedSet
from typing_extensions import Literal

Expand Down Expand Up @@ -90,11 +93,19 @@ def _meta_load(
content_digest = sha1(content).hexdigest()
normalized_seed_values = cls._determine_seed_values(seed_values=seed_values)

ini_parser = configparser.ConfigParser(defaults=normalized_seed_values)
ini_parser.read_string(content.decode())
config_values: _ConfigValues
if PurePath(config_path).suffix == ".toml":
toml_values = cast(Dict[str, Any], toml.loads(content.decode()))
toml_values["DEFAULT"] = {**normalized_seed_values, **toml_values.get("DEFAULT", {})}
config_values = _TomlValues(toml_values)
else:
ini_parser = configparser.ConfigParser(defaults=normalized_seed_values)
ini_parser.read_string(content.decode())
config_values = _IniValues(ini_parser)

single_file_configs.append(
_SingleFileConfig(
config_path=config_path, content_digest=content_digest, values=_IniValues(ini_parser),
config_path=config_path, content_digest=content_digest, values=config_values,
),
)
return _ChainedConfig(tuple(reversed(single_file_configs)))
Expand Down Expand Up @@ -182,10 +193,11 @@ def get_source_for_option(self, section: str, option: str) -> Optional[str]:
class _ConfigValues(ABC):
"""Encapsulates resolving the actual config values specified by the user's config file.

Beyond providing better encapsulation, this allows us to support alternative config file formats
in the future if we ever decide to support formats other than INI.
Due to encapsulation, this allows us to support both TOML and INI config files without any of
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Futureproofing that actually worked! Wooho!

the rest of the Pants codebase knowing whether the config came from TOML or INI.
"""

@property
@abstractmethod
def sections(self) -> List[str]:
"""Returns the sections in this config (not including DEFAULT)."""
Expand All @@ -206,15 +218,17 @@ def get_value(self, section: str, option: str) -> Optional[str]:
def options(self, section: str) -> List[str]:
"""All options defined for the section."""

@property
@abstractmethod
def defaults(self) -> Mapping[str, Any]:
def defaults(self) -> Mapping[str, str]:
"""All the DEFAULT values (not interpolated)."""


@dataclass(frozen=True)
class _IniValues(_ConfigValues):
parser: configparser.ConfigParser

@property
def sections(self) -> List[str]:
return self.parser.sections()

Expand All @@ -230,10 +244,196 @@ def get_value(self, section: str, option: str) -> Optional[str]:
def options(self, section: str) -> List[str]:
return self.parser.options(section)

@property
def defaults(self) -> Mapping[str, str]:
return self.parser.defaults()


_TomlPrimitve = Union[bool, int, float, str]
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't (yet?) account for TOML's native support of date times. We don't have any options that use that type so I left it off.

_TomlValue = Union[_TomlPrimitve, List[_TomlPrimitve]]
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This could also technically be a Dict to represent native Tables, but I left this off because we restrict Tables to solely being used for distinct option scopes/sections, rather than also for dict values. See the PR description.



@dataclass(frozen=True)
class _TomlValues(_ConfigValues):
values: Dict[str, Any]

@staticmethod
def _is_an_option(option_value: Union[_TomlValue, Dict]) -> bool:
"""Determine if the value is actually an option belonging to that section.

A value that looks like an option might actually be a subscope, e.g. the option value
`java` belonging to the section `cache` could actually be the section `cache.java`, rather
than the option `--cache-java`.

We must also handle the special syntax of `my_list_option.add` and `my_list_option.remove`.
"""
if isinstance(option_value, dict):
return "add" in option_value or "remove" in option_value
return True

@staticmethod
def _section_explicitly_defined(section_values: Dict) -> bool:
"""Determine if the section is truly a defined section, meaning that the user explicitly wrote
the section in their config file.

For example, the user may have explicitly defined `cache.java` but never defined `cache`. Due
to TOML's representation of the config as a nested dictionary, naively, it would appear that
`cache` was defined even though the user never explicitly added it to their config.
"""
at_least_one_option_defined = any(
_TomlValues._is_an_option(section_value) for section_value in section_values.values()
)
# We also check if the section was explicitly defined but has no options. We can be confident
# that this is not a parent scope (e.g. `cache` when `cache.java` is really what was defined)
# because the parent scope would store its child scope in its values, so the values would not
# be empty.
blank_section = len(section_values.values()) == 0
Eric-Arellano marked this conversation as resolved.
Show resolved Hide resolved
return at_least_one_option_defined or blank_section

def _find_section_values(self, section: str) -> Optional[Dict]:
"""Find the values for a section, if any.

For example, if the config file was `{'GLOBAL': {'foo': 1}}`, this function would return
`{'foo': 1}` given `section='GLOBAL'`.
"""
def recurse(mapping: Dict, *, remaining_sections: List[str]) -> Optional[Dict]:
if not remaining_sections:
return None
current_section = remaining_sections[0]
if current_section not in mapping:
return None
section_values = mapping[current_section]
if len(remaining_sections) > 1:
return recurse(section_values, remaining_sections=remaining_sections[1:])
if not self._section_explicitly_defined(section_values):
return None
return cast(Dict, section_values)

return recurse(mapping=self.values, remaining_sections=section.split("."))

def _possibly_interpolate_value(self, raw_value: str) -> str:
"""For any values with %(foo)s, substitute it with the corresponding default val."""
def format_str(value: str) -> str:
# Because dictionaries use the symbols `{}`, we must proactively escape the symbols so that
# .format() does not try to improperly interpolate.
escaped_str = value.replace("{", "{{").replace("}", "}}")
new_style_format_str = re.sub(
pattern=r"%\((?P<interpolated>[a-zA-Z_0-9]*)\)s",
repl=r"{\g<interpolated>}",
Comment on lines +321 to +322
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Generally, it's best practice to use re.compile. But it looks like that's not necessary in modern Python due to automatic caching? https://docs.python.org/3/library/re.html#re.compile

Lmk if I should use re.compile and any tips for where to put that - I'm not sure if it needs to be declared as a module constant, rather than a variable belonging to the method.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If python3 automatically caches regexes, and these regexes are only used once on options parsing and then not needed for the rest of the run, then it's probably best not to re.compile at the module level, so I think this is good as-is.

string=escaped_str,
)
return new_style_format_str.format(**self.defaults)

def recursively_format_str(value: str) -> str:
# It's possible to interpolate with a value that itself has an interpolation. We must fully
# evaluate all expressions for parity with configparser.
if not re.search(r"%\([a-zA-Z_0-9]*\)s", value):
return value
return recursively_format_str(value=format_str(value))

return recursively_format_str(raw_value)

def _stringify_val(
self, raw_value: _TomlValue, *, interpolate: bool = True, list_prefix: Optional[str] = None,
) -> str:
"""For parity with configparser, we convert all values back to strings, which allows us to
avoid upstream changes to files like parser.py.

This is clunky. If we drop INI support, we should remove this and use native values."""
Comment on lines +336 to +342
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Originally, I did not take this approach and preserved the parsed data types. But, I realized for the initial prototype that this complicates things too much because it means we have a leaky API. For example, it would be really hard to preserve the my_list_option.append and my_list_option.filter information.

If we end up removing INI, we could remove this all.

if isinstance(raw_value, str):
return self._possibly_interpolate_value(raw_value) if interpolate else raw_value

if isinstance(raw_value, list):
def stringify_list_member(member: _TomlPrimitve) -> str:
if not isinstance(member, str):
return str(member)
interpolated_member = self._possibly_interpolate_value(member) if interpolate else member
return f'"{interpolated_member}"'

list_members = ", ".join(stringify_list_member(member) for member in raw_value)
return f"{list_prefix or ''}[{list_members}]"

return str(raw_value)

@property
def sections(self) -> List[str]:
sections: List[str] = []

def recurse(mapping: Dict, *, parent_section: Optional[str] = None) -> None:
for section, section_values in mapping.items():
if not isinstance(section_values, dict):
continue
# We filter out "DEFAULT" and also check for the special `my_list_option.add` and
# `my_list_option.remove` syntax.
if section == "DEFAULT" or "add" in section_values or "remove" in section_values:
continue
section_name = section if not parent_section else f"{parent_section}.{section}"
if self._section_explicitly_defined(section_values):
sections.append(section_name)
recurse(section_values, parent_section=section_name)

recurse(self.values)
return sections

def has_section(self, section: str) -> bool:
return self._find_section_values(section) is not None

def has_option(self, section: str, option: str) -> bool:
try:
self.get_value(section, option)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Technically, has_option is doing more work than necessary by relying on the get_value() implementation, e.g. it will interpolate values and _stringify them. But, I think that's fine to make this method much simpler. Otherwise, it has extremely high duplication of get_value().

except (configparser.NoSectionError, configparser.NoOptionError):
return False
else:
return True

def get_value(self, section: str, option: str) -> Optional[str]:
section_values = self._find_section_values(section)
if section_values is None:
raise configparser.NoSectionError(section)
if option in self.defaults:
return self._stringify_val(self.defaults[option])
if option not in section_values:
raise configparser.NoOptionError(option, section)
option_value = section_values[option]
# Handle the special `my_list_option.add` and `my_list_option.remove` syntax.
if isinstance(option_value, dict):
has_add = "add" in option_value
has_remove = "remove" in option_value
if not has_add and not has_remove:
raise configparser.NoOptionError(option, section)
add_val = (
self._stringify_val(option_value["add"], list_prefix="+") if has_add else None
)
remove_val = (
self._stringify_val(option_value["remove"], list_prefix="-") if has_remove else None
)
if has_add and has_remove:
return f"{add_val},{remove_val}"
if has_add:
return add_val
return remove_val
return self._stringify_val(option_value)

def options(self, section: str) -> List[str]:
section_values = self._find_section_values(section)
if section_values is None:
raise configparser.NoSectionError(section)
options = [
option
for option, option_value in section_values.items()
if self._is_an_option(option_value)
]
options.extend(self.defaults.keys())
return options

@property
def defaults(self) -> Mapping[str, str]:
return {
option: self._stringify_val(option_val, interpolate=False)
for option, option_val in self.values["DEFAULT"].items()
}


@dataclass(frozen=True)
class _EmptyConfig(Config):
"""A dummy config with no data at all."""
Expand Down Expand Up @@ -274,7 +474,7 @@ def sources(self) -> List[str]:
return [self.config_path]

def sections(self) -> List[str]:
return self.values.sections()
return self.values.sections

def has_section(self, section: str) -> bool:
return self.values.has_section(section)
Expand Down
Loading