Skip to content

Commit

Permalink
Refactor config helper to be cleaner (#1011)
Browse files Browse the repository at this point in the history
* Refactor config helper to be cleaner

Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com>

* Clean up docstrings

Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com>

---------

Signed-off-by: Jun Ki Min <42475935+loomlike@users.noreply.github.com>
  • Loading branch information
loomlike authored Jan 28, 2023
1 parent aac1d53 commit 453b97f
Show file tree
Hide file tree
Showing 4 changed files with 70 additions and 63 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/pull_request_push_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ jobs:
SPARK_CONFIG__SPARK_CLUSTER: databricks
SPARK_CONFIG__DATABRICKS__WORKSPACE_INSTANCE_URL: ${{secrets.DATABRICKS_HOST}}
DATABRICKS_WORKSPACE_TOKEN_VALUE: ${{secrets.DATABRICKS_WORKSPACE_TOKEN_VALUE}}
SPARK_CONFIG__DATABRICKS__CONFIG_TEMPLATE: '{"run_name":"FEATHR_FILL_IN","new_cluster":{"spark_version":"9.1.x-scala2.12","num_workers":1,"spark_conf":{"FEATHR_FILL_IN":"FEATHR_FILL_IN"},"instance_pool_id":"${{secrets.DATABRICKS_INSTANCE_POOL_ID}}"},"libraries":[{"jar":"FEATHR_FILL_IN"}],"spark_jar_task":{"main_class_name":"FEATHR_FILL_IN","parameters":["FEATHR_FILL_IN"]}}'
SPARK_CONFIG__DATABRICKS__CONFIG_TEMPLATE: '{"run_name":"FEATHR_FILL_IN","new_cluster":{"spark_version":"11.3.x-scala2.12","num_workers":1,"spark_conf":{"FEATHR_FILL_IN":"FEATHR_FILL_IN"},"instance_pool_id":"${{secrets.DATABRICKS_INSTANCE_POOL_ID}}"},"libraries":[{"jar":"FEATHR_FILL_IN"}],"spark_jar_task":{"main_class_name":"FEATHR_FILL_IN","parameters":["FEATHR_FILL_IN"]}}'
REDIS_PASSWORD: ${{secrets.REDIS_PASSWORD}}
AZURE_CLIENT_ID: ${{secrets.AZURE_CLIENT_ID}}
AZURE_TENANT_ID: ${{secrets.AZURE_TENANT_ID}}
Expand Down Expand Up @@ -311,4 +311,4 @@ jobs:
run: echo "NOW=$(date +'%Y-%m-%d')" >> $GITHUB_ENV
- name: Notification
run: |
curl -H 'Content-Type: application/json' -d '{"text": "${{env.NOW}} Daily Report: 1. Gradle Test ${{needs.gradle_test.result}}, 2. Python Lint Test ${{needs.python_lint.result}}, 3. Databricks Test ${{needs.databricks_test.result}}, 4. Synapse Test ${{needs.azure_synapse_test.result}} , 5. LOCAL SPARK TEST ${{needs.local_spark_test.result}}. Link: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"}' ${{ secrets.TEAMS_WEBHOOK }}
curl -H 'Content-Type: application/json' -d '{"text": "${{env.NOW}} Daily Report: 1. Gradle Test ${{needs.gradle_test.result}}, 2. Python Lint Test ${{needs.python_lint.result}}, 3. Databricks Test ${{needs.databricks_test.result}}, 4. Synapse Test ${{needs.azure_synapse_test.result}} , 5. LOCAL SPARK TEST ${{needs.local_spark_test.result}}. Link: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"}' ${{ secrets.TEAMS_WEBHOOK }}
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,7 @@ def submit_feathr_job(
spark_args = self._init_args(job_name=job_name, confs=cfg)
# Add additional repositories
spark_args.extend(["--repositories", "https://repository.mulesoft.org/nexus/content/repositories/public/,https://linkedin.jfrog.io/artifactory/open-source/"])
# spark_args.extend(["--repositories", "https://linkedin.jfrog.io/artifactory/open-source/"])


if not main_jar_path:
# We don't have the main jar, use Maven
if not python_files:
Expand Down Expand Up @@ -118,7 +117,7 @@ def submit_feathr_job(
if python_files.__len__() > 1:
spark_args.extend(["--py-files", ",".join(python_files[1:])])
spark_args.append(python_files[0])


if arguments:
spark_args.extend(arguments)
Expand Down
57 changes: 24 additions & 33 deletions feathr_project/feathr/utils/_env_config_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,7 @@
class EnvConfigReader(object):
"""A utility class to read Feathr environment variables either from os environment variables,
the config yaml file or Azure Key Vault.
If a key is set in the environment variable, ConfigReader will return the value of that environment variable
unless use_env_vars set to False.
If a key is set in the environment variable, ConfigReader will return the value of that environment variable.
"""
akv_name: str = None # Azure Key Vault name to use for retrieving config values.
yaml_config: dict = None # YAML config file content.
Expand All @@ -21,7 +20,6 @@ def __init__(self, config_path: str):
Args:
config_path: Config file path.
use_env_vars (optional): Whether to use os environment variables instead of config file. Defaults to True.
"""
if config_path is not None:
config_path = Path(config_path)
Expand All @@ -37,7 +35,7 @@ def __init__(self, config_path: str):
def get(self, key: str, default: str = None) -> str:
"""Gets the Feathr config variable for the given key.
It will retrieve the value in the following order:
- From the environment variable if `use_env_vars == True` and the key is set in the os environment variables.
- From the environment variable if the key is set in the os environment variables.
- From the config yaml file if the key exists.
- From the Azure Key Vault.
If the key is not found in any of the above, it will return `default`.
Expand All @@ -49,24 +47,21 @@ def get(self, key: str, default: str = None) -> str:
Returns:
Feathr client's config value.
"""
res_env = self._get_variable_from_env(key)
res_file = (self._get_variable_from_file(key) if self.yaml_config else None)
res_keyvault = (self._get_variable_from_akv(key) if self.akv_name else None)

# rewrite the logic below to make sure:
# First we have the order (i.e. res1 > res2 > res3 > default)
# Also previously we use OR for the result, which will yield a bug where say res1=None, res2=False, res3=None. Using OR will result to None result, although res2 actually have value
for res in [res_env, res_file, res_keyvault]:
if res is not None:
return res

logger.info(f"Config {key} is not found in the environment variable, configuration file, or the remote key value store. Using default value which is {default}.")

return default
val = self._get_variable_from_env(key)
# `val` could be a boolean value, so we need to check if it is None.
if val is None and self.yaml_config is not None:
val = self._get_variable_from_file(key)
if val is None and self.akv_name is not None:
val = self._get_variable_from_akv(key)

if val is not None:
return val
else:
logger.info(f"Config {key} is not found in the environment variable, configuration file, or the remote key value store. Returning the default value: {default}.")
return default

def get_from_env_or_akv(self, key: str) -> str:
"""Gets the Feathr config variable for the given key. This function ignores `use_env_vars` attribute and force to
look up environment variables or Azure Key Vault.
"""Gets the Feathr config variable for the given key.
It will retrieve the value in the following order:
- From the environment variable if the key is set in the os environment variables.
- From the Azure Key Vault.
Expand All @@ -78,24 +73,21 @@ def get_from_env_or_akv(self, key: str) -> str:
Returns:
Feathr client's config value.
"""
res_env = self._get_variable_from_env(key)
res_keyvault = (self._get_variable_from_akv(key) if self.akv_name else None)
val = self._get_variable_from_env(key)
# `val` could be a boolean value, so we need to check if it is None.
if val is None and self.akv_name is not None:
val = self._get_variable_from_akv(key)

# rewrite the logic below to make sure:
# First we have the order (i.e. res1 > res2 > res3 > default)
# Also previously we use OR for the result, which will yield a bug where say res1=None, res2=False, res3=None. Using OR will result to None result, although res2 actually have value
for res in [res_env, res_keyvault]:
if res is not None:
return res

logger.warning(f"Config {key} is not found in the environment variable or the remote key value store.")
return None
if val is not None:
return val
else:
logger.warning(f"Config {key} is not found in the environment variable or the remote key value store.")
return None

def _get_variable_from_env(self, key: str) -> str:
# make it work for lower case and upper case.
conf_var = os.environ.get(key.lower(), os.environ.get(key.upper()))


return conf_var

def _get_variable_from_akv(self, key: str) -> str:
Expand All @@ -118,7 +110,6 @@ def _get_variable_from_file(self, key: str) -> str:
# make it work for lower case and upper case.
conf_var = conf_var.get(arg.lower(), conf_var.get(arg.upper()))


return conf_var
except Exception as e:
logger.warning(e)
Expand Down
67 changes: 42 additions & 25 deletions feathr_project/test/unit/utils/test_env_config_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,57 +15,74 @@
config:
key: '{TEST_CONFIG_FILE_VAL}'
"""
TEST_CONFIG_AKV_VAL = "test_akv_val"


@pytest.mark.parametrize(
"env_value, expected_value",
"env_value, config_file_content, akv_value, expected_value",
[
(TEST_CONFIG_ENV_VAL, TEST_CONFIG_ENV_VAL),
(None, TEST_CONFIG_FILE_VAL),
(TEST_CONFIG_ENV_VAL, TEST_CONFIG_FILE_CONTENT, TEST_CONFIG_AKV_VAL, TEST_CONFIG_ENV_VAL),
(None, TEST_CONFIG_FILE_CONTENT, TEST_CONFIG_AKV_VAL, TEST_CONFIG_FILE_VAL),
(None, "", TEST_CONFIG_AKV_VAL, TEST_CONFIG_AKV_VAL),
(None, "", None, "default"),
]
)
def test__envvariableutil__get(
mocker: MockerFixture,
env_value: str,
config_file_content: str,
akv_value: str,
expected_value: str,
):
"""Test `get` method if it returns the correct value.
"""Test `get` method if it returns the expected value.
"""
if env_value:
mocker.patch.object(feathr.utils._env_config_reader.os, "environ", {TEST_CONFIG_KEY: env_value})

# Mock env variables
mocker.patch.object(feathr.utils._env_config_reader.os, "environ", {
TEST_CONFIG_KEY: env_value,
"secrets__azure_key_vault__name": "test_akv_name",
})
# Mock AKS
mocker.patch.object(
feathr.utils._env_config_reader.AzureKeyVaultClient,
"get_feathr_akv_secret",
return_value=akv_value,
)
# Generate config file
f = NamedTemporaryFile(delete=True)
f.write(TEST_CONFIG_FILE_CONTENT.encode())
f.write(config_file_content.encode())
f.seek(0)
env_config = EnvConfigReader(config_path=f.name)
assert env_config.get(TEST_CONFIG_KEY) == expected_value

assert env_config.get(TEST_CONFIG_KEY, default="default") == expected_value


@pytest.mark.parametrize(
"env_value, expected_value",
"env_value, akv_value, expected_value",
[
(TEST_CONFIG_ENV_VAL, TEST_CONFIG_ENV_VAL),
(None, None),
(TEST_CONFIG_ENV_VAL, TEST_CONFIG_ENV_VAL),
(TEST_CONFIG_ENV_VAL, TEST_CONFIG_AKV_VAL, TEST_CONFIG_ENV_VAL),
(None, TEST_CONFIG_AKV_VAL, TEST_CONFIG_AKV_VAL),
(None, None, None),
]
)
def test__envvariableutil__get_from_env_or_akv(
mocker: MockerFixture,
env_value: str,
akv_value: str,
expected_value: str,
):
"""Test `get_from_env_or_akv` method if it returns the environment variable regardless of `use_env_vars` argument.
Args:
mocker (MockerFixture): _description_
env_value (str): _description_
expected_value (str): _description_
"""Test `get_from_env_or_akv` method if it returns the expected value.
"""
if env_value:
mocker.patch.object(feathr.utils._env_config_reader.os, "environ", {TEST_CONFIG_KEY: env_value})
# Mock env variables
mocker.patch.object(feathr.utils._env_config_reader.os, "environ", {
TEST_CONFIG_KEY: env_value,
"secrets__azure_key_vault__name": "test_akv_name",
})
# Mock AKS
mocker.patch.object(
feathr.utils._env_config_reader.AzureKeyVaultClient,
"get_feathr_akv_secret",
return_value=akv_value,
)

f = NamedTemporaryFile(delete=True)
f.write(TEST_CONFIG_FILE_CONTENT.encode())
f.seek(0)
env_config = EnvConfigReader(config_path=f.name)
env_config = EnvConfigReader(config_path="")
assert env_config.get_from_env_or_akv(TEST_CONFIG_KEY) == expected_value

0 comments on commit 453b97f

Please sign in to comment.