Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow users to pass credentials through environment variables #2178

Merged
merged 20 commits into from
Jan 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
d0cbcb8
Add failing test
jmholzer Jan 6, 2023
c0d784f
Add in-place environment variable resolution
jmholzer Jan 6, 2023
74da5fc
Add test to ensure oc.env resolver is cleared after reading credentials
jmholzer Jan 6, 2023
3149eeb
Refactor docstring on _resolve_environment_variables
jmholzer Jan 6, 2023
d07cb3b
Move credentials.yml creation to fixture
jmholzer Jan 9, 2023
26aa770
Modify _resolve_environment_variables to only clear the oc.env resolv…
jmholzer Jan 9, 2023
238ce4d
Move environment variable resolution to load_and_merge_dir_config
jmholzer Jan 9, 2023
fc7f81e
Add read_environment_variables to load_and_merge_dir_config docstring
jmholzer Jan 9, 2023
606d370
Merge branch 'main' into feat/allow-credentials-via-env-variables
jmholzer Jan 9, 2023
6cf50d3
Merge branch 'main' into feat/allow-credentials-via-env-variables
jmholzer Jan 9, 2023
bc96421
Merge branch 'main' into feat/allow-credentials-via-env-variables
jmholzer Jan 10, 2023
b3bbddc
Merge branch 'main' into feat/allow-credentials-via-env-variables
jmholzer Jan 10, 2023
e8fac8e
Add test for env resolver not being used when config key is not 'cred…
jmholzer Jan 10, 2023
5ab867c
Merge branch 'feat/allow-credentials-via-env-variables' of github.com…
jmholzer Jan 10, 2023
c3ca516
Lint
jmholzer Jan 10, 2023
ca83b47
Add release note
jmholzer Jan 10, 2023
6e3c7c8
Refactor _resolve_environment_variables to remove unnecessary logic c…
jmholzer Jan 10, 2023
800df92
Add test_env_resolver_is_registered_after_loading
jmholzer Jan 10, 2023
7e6727f
Fix test_env_resolver_is_registered_after_loading
jmholzer Jan 10, 2023
4060639
Merge branch 'main' into feat/allow-credentials-via-env-variables
jmholzer Jan 13, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions RELEASE.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
* Added support for generator functions as nodes, i.e. using `yield` instead of return.
* Enable chunk-wise processing in nodes with generator functions.
* Save node outputs after every `yield` before proceeding with next chunk.
* Added support for loading credentials from environment variables using OmegaConfLoader.

## Bug fixes and other changes
* Commas surrounded by square brackets (only possible for nodes with default names) will no longer split the arguments to `kedro run` options which take a list of nodes as inputs (`--from-nodes` and `--to-nodes`).
Expand Down
39 changes: 35 additions & 4 deletions kedro/config/omegaconf_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@
import logging
from glob import iglob
from pathlib import Path
from typing import Any, Dict, Iterable, List, Set # noqa
from typing import Any, Dict, Iterable, List, Optional, Set # noqa

from omegaconf import OmegaConf
from omegaconf.resolvers import oc
from yaml.parser import ParserError
from yaml.scanner import ScannerError

Expand Down Expand Up @@ -143,15 +144,21 @@ def __getitem__(self, key) -> Dict[str, Any]:
)
patterns = [*self.config_patterns[key]]

read_environment_variables = key == "credentials"

# Load base env config
base_path = str(Path(self.conf_source) / self.base_env)
base_config = self.load_and_merge_dir_config(base_path, patterns)
base_config = self.load_and_merge_dir_config(
base_path, patterns, read_environment_variables
)
config = base_config

# Load chosen env config
run_env = self.env or self.default_run_env
env_path = str(Path(self.conf_source) / run_env)
env_config = self.load_and_merge_dir_config(env_path, patterns)
env_config = self.load_and_merge_dir_config(
env_path, patterns, read_environment_variables
)

# Destructively merge the two env dirs. The chosen env will override base.
common_keys = config.keys() & env_config.keys()
Expand All @@ -178,13 +185,19 @@ def __repr__(self): # pragma: no cover
f"config_patterns={self.config_patterns})"
)

def load_and_merge_dir_config(self, conf_path: str, patterns: Iterable[str]):
def load_and_merge_dir_config(
self,
conf_path: str,
patterns: Iterable[str],
read_environment_variables: Optional[bool] = False,
) -> Dict[str, Any]:
"""Recursively load and merge all configuration files in a directory using OmegaConf,
which satisfy a given list of glob patterns from a specific path.

Args:
conf_path: Path to configuration directory.
patterns: List of glob patterns to match the filenames against.
read_environment_variables: Whether to resolve environment variables.

Raises:
MissingConfigException: If configuration path doesn't exist or isn't valid.
Expand Down Expand Up @@ -216,6 +229,8 @@ def load_and_merge_dir_config(self, conf_path: str, patterns: Iterable[str]):
for config_filepath in config_files_filtered:
try:
config = OmegaConf.load(config_filepath)
if read_environment_variables:
self._resolve_environment_variables(config)
config_per_file[config_filepath] = config
except (ParserError, ScannerError) as exc:
line = exc.problem_mark.line # type: ignore
Expand Down Expand Up @@ -266,6 +281,22 @@ def _check_duplicates(seen_files_to_keys: Dict[Path, Set[Any]]):
dup_str = "\n".join(duplicates)
raise ValueError(f"{dup_str}")

@staticmethod
def _resolve_environment_variables(config: Dict[str, Any]) -> None:
"""Use the ``oc.env`` resolver to read environment variables and replace
them in-place, clearing the resolver after the operation is complete if
it was not registered beforehand.

Arguments:
config {Dict[str, Any]} -- The configuration dictionary to resolve.
"""
if not OmegaConf.has_resolver("oc.env"):
OmegaConf.register_new_resolver("oc.env", oc.env)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we can drop the oc. or simply support both env and oc.env?

Copy link
Contributor Author

@jmholzer jmholzer Jan 12, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you mean drop the "oc." in:

  1. name of the resolver (first argument to register_new_resolver)
    or in:
  2. the name assigned to omegaconf.resolvers.oc.env (second argument to register_new_resolver)?

I personally wouldn't drop the oc. from 1., since this is the name assigned to the resolver by OmegaConf when it is instantiated. For 2. I would be ok with losing the namespace from the import: from omegaconf.resolvers.oc import env.

OmegaConf.resolve(config)
OmegaConf.clear_resolver("oc.env")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would have been nice if OmegaConf did not rely on global state...

else:
OmegaConf.resolve(config)

@staticmethod
def _clear_omegaconf_resolvers():
"""Clear the built-in OmegaConf resolvers."""
Expand Down
64 changes: 64 additions & 0 deletions tests/config/test_omegaconf_config.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
# pylint: disable=expression-not-assigned, pointless-statement
import configparser
import json
import os
import re
from pathlib import Path
from typing import Dict

import pytest
import yaml
from omegaconf import OmegaConf, errors
from omegaconf.resolvers import oc
from yaml.parser import ParserError

from kedro.config import MissingConfigException, OmegaConfLoader
Expand Down Expand Up @@ -86,8 +89,26 @@ def proj_catalog_nested(tmp_path):
_write_yaml(path, {"nested": {"type": "MemoryDataSet"}})


@pytest.fixture
def proj_catalog_env_variable(tmp_path):
path = tmp_path / _BASE_ENV / "catalog" / "dir" / "nested.yml"
_write_yaml(path, {"test": {"file_path": "${oc.env:TEST_FILE_PATH}"}})


@pytest.fixture
def proj_credentials_env_variable(tmp_path):
path = tmp_path / _DEFAULT_RUN_ENV / "credentials.yml"
_write_yaml(
path, {"user": {"name": "${oc.env:TEST_USERNAME}", "key": "${oc.env:TEST_KEY}"}}
)


use_config_dir = pytest.mark.usefixtures("create_config_dir")
use_proj_catalog = pytest.mark.usefixtures("proj_catalog")
use_credentials_env_variable_yml = pytest.mark.usefixtures(
"proj_credentials_env_variable"
)
use_catalog_env_variable_yml = pytest.mark.usefixtures("proj_catalog_env_variable")


class TestOmegaConfLoader:
Expand Down Expand Up @@ -421,3 +442,46 @@ def test_bypass_catalog_config_loading(self, tmp_path):
conf["catalog"] = {"catalog_config": "something_new"}

assert conf["catalog"] == {"catalog_config": "something_new"}

@use_config_dir
@use_credentials_env_variable_yml
def test_load_credentials_from_env_variables(self, tmp_path):
"""Load credentials from environment variables"""
conf = OmegaConfLoader(str(tmp_path))
os.environ["TEST_USERNAME"] = "test_user"
os.environ["TEST_KEY"] = "test_key"
assert conf["credentials"]["user"]["name"] == "test_user"
assert conf["credentials"]["user"]["key"] == "test_key"

@use_config_dir
@use_catalog_env_variable_yml
def test_env_resolver_not_used_for_catalog(self, tmp_path):
"""Check that the oc.env resolver is not used for catalog loading"""
conf = OmegaConfLoader(str(tmp_path))
os.environ["TEST_DATASET"] = "test_dataset"
with pytest.raises(errors.UnsupportedInterpolationType):
conf["catalog"]["test"]["file_path"]

@use_config_dir
@use_credentials_env_variable_yml
def test_env_resolver_is_cleared_after_loading(self, tmp_path):
"""Check that the ``oc.env`` resolver is cleared after loading credentials
in the case that it was not registered beforehand."""
conf = OmegaConfLoader(str(tmp_path))
os.environ["TEST_USERNAME"] = "test_user"
os.environ["TEST_KEY"] = "test_key"
assert conf["credentials"]["user"]["name"] == "test_user"
assert not OmegaConf.has_resolver("oc.env")

@use_config_dir
@use_credentials_env_variable_yml
def test_env_resolver_is_registered_after_loading(self, tmp_path):
"""Check that the ``oc.env`` resolver is registered after loading credentials
in the case that it was registered beforehand"""
conf = OmegaConfLoader(str(tmp_path))
OmegaConf.register_new_resolver("oc.env", oc.env)
os.environ["TEST_USERNAME"] = "test_user"
os.environ["TEST_KEY"] = "test_key"
assert conf["credentials"]["user"]["name"] == "test_user"
assert OmegaConf.has_resolver("oc.env")
OmegaConf.clear_resolver("oc.env")