Skip to content

Commit

Permalink
Allow users to pass credentials through environment variables (#2178)
Browse files Browse the repository at this point in the history
* Add failing test

Signed-off-by: Jannic Holzer <jannic.holzer@quantumblack.com>

* Add in-place environment variable resolution

Signed-off-by: Jannic Holzer <jannic.holzer@quantumblack.com>

* Add test to ensure oc.env resolver is cleared after reading credentials

Signed-off-by: Jannic Holzer <jannic.holzer@quantumblack.com>

* Refactor docstring on _resolve_environment_variables

Signed-off-by: Jannic Holzer <jannic.holzer@quantumblack.com>

* Move credentials.yml creation to fixture

Signed-off-by: Jannic Holzer <jannic.holzer@quantumblack.com>

* Modify _resolve_environment_variables to only clear the oc.env resolver if it was not registered

Signed-off-by: Jannic Holzer <jannic.holzer@quantumblack.com>

* Move environment variable resolution to load_and_merge_dir_config

Signed-off-by: Jannic Holzer <jannic.holzer@quantumblack.com>

* Add read_environment_variables to load_and_merge_dir_config docstring

Signed-off-by: Jannic Holzer <jannic.holzer@quantumblack.com>

* Add test for env resolver not being used when config key is not 'credentials'

Signed-off-by: Jannic Holzer <jannic.holzer@quantumblack.com>

* Lint

Signed-off-by: Jannic Holzer <jannic.holzer@quantumblack.com>

* Add release note

Signed-off-by: Jannic Holzer <jannic.holzer@quantumblack.com>

* Refactor _resolve_environment_variables to remove unnecessary logic control flow variable

Signed-off-by: Jannic Holzer <jannic.holzer@quantumblack.com>

* Add test_env_resolver_is_registered_after_loading

Signed-off-by: Jannic Holzer <jannic.holzer@quantumblack.com>

* Fix test_env_resolver_is_registered_after_loading

Signed-off-by: Jannic Holzer <jannic.holzer@quantumblack.com>

Signed-off-by: Jannic Holzer <jannic.holzer@quantumblack.com>
  • Loading branch information
jmholzer authored Jan 13, 2023
1 parent 3d01345 commit e126891
Show file tree
Hide file tree
Showing 3 changed files with 100 additions and 4 deletions.
1 change: 1 addition & 0 deletions RELEASE.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
* Added support for generator functions as nodes, i.e. using `yield` instead of return.
* Enable chunk-wise processing in nodes with generator functions.
* Save node outputs after every `yield` before proceeding with next chunk.
* Added support for loading credentials from environment variables using OmegaConfLoader.

## Bug fixes and other changes
* Commas surrounded by square brackets (only possible for nodes with default names) will no longer split the arguments to `kedro run` options which take a list of nodes as inputs (`--from-nodes` and `--to-nodes`).
Expand Down
39 changes: 35 additions & 4 deletions kedro/config/omegaconf_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@
import logging
from glob import iglob
from pathlib import Path
from typing import Any, Dict, Iterable, List, Set # noqa
from typing import Any, Dict, Iterable, List, Optional, Set # noqa

from omegaconf import OmegaConf
from omegaconf.resolvers import oc
from yaml.parser import ParserError
from yaml.scanner import ScannerError

Expand Down Expand Up @@ -143,15 +144,21 @@ def __getitem__(self, key) -> Dict[str, Any]:
)
patterns = [*self.config_patterns[key]]

read_environment_variables = key == "credentials"

# Load base env config
base_path = str(Path(self.conf_source) / self.base_env)
base_config = self.load_and_merge_dir_config(base_path, patterns)
base_config = self.load_and_merge_dir_config(
base_path, patterns, read_environment_variables
)
config = base_config

# Load chosen env config
run_env = self.env or self.default_run_env
env_path = str(Path(self.conf_source) / run_env)
env_config = self.load_and_merge_dir_config(env_path, patterns)
env_config = self.load_and_merge_dir_config(
env_path, patterns, read_environment_variables
)

# Destructively merge the two env dirs. The chosen env will override base.
common_keys = config.keys() & env_config.keys()
Expand All @@ -178,13 +185,19 @@ def __repr__(self): # pragma: no cover
f"config_patterns={self.config_patterns})"
)

def load_and_merge_dir_config(self, conf_path: str, patterns: Iterable[str]):
def load_and_merge_dir_config(
self,
conf_path: str,
patterns: Iterable[str],
read_environment_variables: Optional[bool] = False,
) -> Dict[str, Any]:
"""Recursively load and merge all configuration files in a directory using OmegaConf,
which satisfy a given list of glob patterns from a specific path.
Args:
conf_path: Path to configuration directory.
patterns: List of glob patterns to match the filenames against.
read_environment_variables: Whether to resolve environment variables.
Raises:
MissingConfigException: If configuration path doesn't exist or isn't valid.
Expand Down Expand Up @@ -216,6 +229,8 @@ def load_and_merge_dir_config(self, conf_path: str, patterns: Iterable[str]):
for config_filepath in config_files_filtered:
try:
config = OmegaConf.load(config_filepath)
if read_environment_variables:
self._resolve_environment_variables(config)
config_per_file[config_filepath] = config
except (ParserError, ScannerError) as exc:
line = exc.problem_mark.line # type: ignore
Expand Down Expand Up @@ -266,6 +281,22 @@ def _check_duplicates(seen_files_to_keys: Dict[Path, Set[Any]]):
dup_str = "\n".join(duplicates)
raise ValueError(f"{dup_str}")

@staticmethod
def _resolve_environment_variables(config: Dict[str, Any]) -> None:
"""Use the ``oc.env`` resolver to read environment variables and replace
them in-place, clearing the resolver after the operation is complete if
it was not registered beforehand.
Arguments:
config {Dict[str, Any]} -- The configuration dictionary to resolve.
"""
if not OmegaConf.has_resolver("oc.env"):
OmegaConf.register_new_resolver("oc.env", oc.env)
OmegaConf.resolve(config)
OmegaConf.clear_resolver("oc.env")
else:
OmegaConf.resolve(config)

@staticmethod
def _clear_omegaconf_resolvers():
"""Clear the built-in OmegaConf resolvers."""
Expand Down
64 changes: 64 additions & 0 deletions tests/config/test_omegaconf_config.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
# pylint: disable=expression-not-assigned, pointless-statement
import configparser
import json
import os
import re
from pathlib import Path
from typing import Dict

import pytest
import yaml
from omegaconf import OmegaConf, errors
from omegaconf.resolvers import oc
from yaml.parser import ParserError

from kedro.config import MissingConfigException, OmegaConfLoader
Expand Down Expand Up @@ -86,8 +89,26 @@ def proj_catalog_nested(tmp_path):
_write_yaml(path, {"nested": {"type": "MemoryDataSet"}})


@pytest.fixture
def proj_catalog_env_variable(tmp_path):
path = tmp_path / _BASE_ENV / "catalog" / "dir" / "nested.yml"
_write_yaml(path, {"test": {"file_path": "${oc.env:TEST_FILE_PATH}"}})


@pytest.fixture
def proj_credentials_env_variable(tmp_path):
path = tmp_path / _DEFAULT_RUN_ENV / "credentials.yml"
_write_yaml(
path, {"user": {"name": "${oc.env:TEST_USERNAME}", "key": "${oc.env:TEST_KEY}"}}
)


use_config_dir = pytest.mark.usefixtures("create_config_dir")
use_proj_catalog = pytest.mark.usefixtures("proj_catalog")
use_credentials_env_variable_yml = pytest.mark.usefixtures(
"proj_credentials_env_variable"
)
use_catalog_env_variable_yml = pytest.mark.usefixtures("proj_catalog_env_variable")


class TestOmegaConfLoader:
Expand Down Expand Up @@ -421,3 +442,46 @@ def test_bypass_catalog_config_loading(self, tmp_path):
conf["catalog"] = {"catalog_config": "something_new"}

assert conf["catalog"] == {"catalog_config": "something_new"}

@use_config_dir
@use_credentials_env_variable_yml
def test_load_credentials_from_env_variables(self, tmp_path):
"""Load credentials from environment variables"""
conf = OmegaConfLoader(str(tmp_path))
os.environ["TEST_USERNAME"] = "test_user"
os.environ["TEST_KEY"] = "test_key"
assert conf["credentials"]["user"]["name"] == "test_user"
assert conf["credentials"]["user"]["key"] == "test_key"

@use_config_dir
@use_catalog_env_variable_yml
def test_env_resolver_not_used_for_catalog(self, tmp_path):
"""Check that the oc.env resolver is not used for catalog loading"""
conf = OmegaConfLoader(str(tmp_path))
os.environ["TEST_DATASET"] = "test_dataset"
with pytest.raises(errors.UnsupportedInterpolationType):
conf["catalog"]["test"]["file_path"]

@use_config_dir
@use_credentials_env_variable_yml
def test_env_resolver_is_cleared_after_loading(self, tmp_path):
"""Check that the ``oc.env`` resolver is cleared after loading credentials
in the case that it was not registered beforehand."""
conf = OmegaConfLoader(str(tmp_path))
os.environ["TEST_USERNAME"] = "test_user"
os.environ["TEST_KEY"] = "test_key"
assert conf["credentials"]["user"]["name"] == "test_user"
assert not OmegaConf.has_resolver("oc.env")

@use_config_dir
@use_credentials_env_variable_yml
def test_env_resolver_is_registered_after_loading(self, tmp_path):
"""Check that the ``oc.env`` resolver is registered after loading credentials
in the case that it was registered beforehand"""
conf = OmegaConfLoader(str(tmp_path))
OmegaConf.register_new_resolver("oc.env", oc.env)
os.environ["TEST_USERNAME"] = "test_user"
os.environ["TEST_KEY"] = "test_key"
assert conf["credentials"]["user"]["name"] == "test_user"
assert OmegaConf.has_resolver("oc.env")
OmegaConf.clear_resolver("oc.env")

0 comments on commit e126891

Please sign in to comment.