Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor config helper to be cleaner #1011

Merged
merged 2 commits into from
Jan 28, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/pull_request_push_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ jobs:
SPARK_CONFIG__SPARK_CLUSTER: databricks
SPARK_CONFIG__DATABRICKS__WORKSPACE_INSTANCE_URL: ${{secrets.DATABRICKS_HOST}}
DATABRICKS_WORKSPACE_TOKEN_VALUE: ${{secrets.DATABRICKS_WORKSPACE_TOKEN_VALUE}}
SPARK_CONFIG__DATABRICKS__CONFIG_TEMPLATE: '{"run_name":"FEATHR_FILL_IN","new_cluster":{"spark_version":"9.1.x-scala2.12","num_workers":1,"spark_conf":{"FEATHR_FILL_IN":"FEATHR_FILL_IN"},"instance_pool_id":"${{secrets.DATABRICKS_INSTANCE_POOL_ID}}"},"libraries":[{"jar":"FEATHR_FILL_IN"}],"spark_jar_task":{"main_class_name":"FEATHR_FILL_IN","parameters":["FEATHR_FILL_IN"]}}'
SPARK_CONFIG__DATABRICKS__CONFIG_TEMPLATE: '{"run_name":"FEATHR_FILL_IN","new_cluster":{"spark_version":"11.3.x-scala2.12","num_workers":1,"spark_conf":{"FEATHR_FILL_IN":"FEATHR_FILL_IN"},"instance_pool_id":"${{secrets.DATABRICKS_INSTANCE_POOL_ID}}"},"libraries":[{"jar":"FEATHR_FILL_IN"}],"spark_jar_task":{"main_class_name":"FEATHR_FILL_IN","parameters":["FEATHR_FILL_IN"]}}'
REDIS_PASSWORD: ${{secrets.REDIS_PASSWORD}}
AZURE_CLIENT_ID: ${{secrets.AZURE_CLIENT_ID}}
AZURE_TENANT_ID: ${{secrets.AZURE_TENANT_ID}}
Expand Down Expand Up @@ -311,4 +311,4 @@ jobs:
run: echo "NOW=$(date +'%Y-%m-%d')" >> $GITHUB_ENV
- name: Notification
run: |
curl -H 'Content-Type: application/json' -d '{"text": "${{env.NOW}} Daily Report: 1. Gradle Test ${{needs.gradle_test.result}}, 2. Python Lint Test ${{needs.python_lint.result}}, 3. Databricks Test ${{needs.databricks_test.result}}, 4. Synapse Test ${{needs.azure_synapse_test.result}} , 5. LOCAL SPARK TEST ${{needs.local_spark_test.result}}. Link: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"}' ${{ secrets.TEAMS_WEBHOOK }}
curl -H 'Content-Type: application/json' -d '{"text": "${{env.NOW}} Daily Report: 1. Gradle Test ${{needs.gradle_test.result}}, 2. Python Lint Test ${{needs.python_lint.result}}, 3. Databricks Test ${{needs.databricks_test.result}}, 4. Synapse Test ${{needs.azure_synapse_test.result}} , 5. LOCAL SPARK TEST ${{needs.local_spark_test.result}}. Link: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"}' ${{ secrets.TEAMS_WEBHOOK }}
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,7 @@ def submit_feathr_job(
spark_args = self._init_args(job_name=job_name, confs=cfg)
# Add additional repositories
spark_args.extend(["--repositories", "https://repository.mulesoft.org/nexus/content/repositories/public/,https://linkedin.jfrog.io/artifactory/open-source/"])
# spark_args.extend(["--repositories", "https://linkedin.jfrog.io/artifactory/open-source/"])


if not main_jar_path:
# We don't have the main jar, use Maven
if not python_files:
Expand Down Expand Up @@ -118,7 +117,7 @@ def submit_feathr_job(
if python_files.__len__() > 1:
spark_args.extend(["--py-files", ",".join(python_files[1:])])
spark_args.append(python_files[0])


if arguments:
spark_args.extend(arguments)
Expand Down
46 changes: 19 additions & 27 deletions feathr_project/feathr/utils/_env_config_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,20 +49,17 @@ def get(self, key: str, default: str = None) -> str:
Returns:
Feathr client's config value.
"""
res_env = self._get_variable_from_env(key)
res_file = (self._get_variable_from_file(key) if self.yaml_config else None)
res_keyvault = (self._get_variable_from_akv(key) if self.akv_name else None)

# rewrite the logic below to make sure:
# First we have the order (i.e. res1 > res2 > res3 > default)
# Also previously we use OR for the result, which will yield a bug where say res1=None, res2=False, res3=None. Using OR will result to None result, although res2 actually have value
for res in [res_env, res_file, res_keyvault]:
if res is not None:
return res

logger.info(f"Config {key} is not found in the environment variable, configuration file, or the remote key value store. Using default value which is {default}.")

return default
val = self._get_variable_from_env(key)
if val is None and self.yaml_config is not None:
val = self._get_variable_from_file(key)
if val is None and self.akv_name is not None:
val = self._get_variable_from_akv(key)

if val is not None:
return val
else:
logger.info(f"Config {key} is not found in the environment variable, configuration file, or the remote key value store. Returning the default value: {default}.")
return default

def get_from_env_or_akv(self, key: str) -> str:
"""Gets the Feathr config variable for the given key. This function ignores `use_env_vars` attribute and force to
Expand All @@ -78,24 +75,20 @@ def get_from_env_or_akv(self, key: str) -> str:
Returns:
Feathr client's config value.
"""
res_env = self._get_variable_from_env(key)
res_keyvault = (self._get_variable_from_akv(key) if self.akv_name else None)
val = self._get_variable_from_env(key)
if val is None and self.akv_name is not None:
val = self._get_variable_from_akv(key)

# rewrite the logic below to make sure:
# First we have the order (i.e. res1 > res2 > res3 > default)
# Also previously we use OR for the result, which will yield a bug where say res1=None, res2=False, res3=None. Using OR will result to None result, although res2 actually have value
for res in [res_env, res_keyvault]:
if res is not None:
return res

logger.warning(f"Config {key} is not found in the environment variable or the remote key value store.")
return None
if val is not None:
return val
xiaoyongzhu marked this conversation as resolved.
Show resolved Hide resolved
else:
logger.warning(f"Config {key} is not found in the environment variable or the remote key value store.")
return None

def _get_variable_from_env(self, key: str) -> str:
# make it work for lower case and upper case.
conf_var = os.environ.get(key.lower(), os.environ.get(key.upper()))


return conf_var

def _get_variable_from_akv(self, key: str) -> str:
Expand All @@ -118,7 +111,6 @@ def _get_variable_from_file(self, key: str) -> str:
# make it work for lower case and upper case.
conf_var = conf_var.get(arg.lower(), conf_var.get(arg.upper()))


return conf_var
except Exception as e:
logger.warning(e)
Expand Down
67 changes: 42 additions & 25 deletions feathr_project/test/unit/utils/test_env_config_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,57 +15,74 @@
config:
key: '{TEST_CONFIG_FILE_VAL}'
"""
TEST_CONFIG_AKV_VAL = "test_akv_val"


@pytest.mark.parametrize(
"env_value, expected_value",
"env_value, config_file_content, akv_value, expected_value",
[
(TEST_CONFIG_ENV_VAL, TEST_CONFIG_ENV_VAL),
(None, TEST_CONFIG_FILE_VAL),
(TEST_CONFIG_ENV_VAL, TEST_CONFIG_FILE_CONTENT, TEST_CONFIG_AKV_VAL, TEST_CONFIG_ENV_VAL),
(None, TEST_CONFIG_FILE_CONTENT, TEST_CONFIG_AKV_VAL, TEST_CONFIG_FILE_VAL),
(None, "", TEST_CONFIG_AKV_VAL, TEST_CONFIG_AKV_VAL),
(None, "", None, "default"),
]
)
def test__envvariableutil__get(
mocker: MockerFixture,
env_value: str,
config_file_content: str,
akv_value: str,
expected_value: str,
):
"""Test `get` method if it returns the correct value.
"""Test `get` method if it returns the expected value.
"""
if env_value:
mocker.patch.object(feathr.utils._env_config_reader.os, "environ", {TEST_CONFIG_KEY: env_value})

# Mock env variables
mocker.patch.object(feathr.utils._env_config_reader.os, "environ", {
TEST_CONFIG_KEY: env_value,
"secrets__azure_key_vault__name": "test_akv_name",
})
# Mock AKS
mocker.patch.object(
feathr.utils._env_config_reader.AzureKeyVaultClient,
"get_feathr_akv_secret",
return_value=akv_value,
)
# Generate config file
f = NamedTemporaryFile(delete=True)
f.write(TEST_CONFIG_FILE_CONTENT.encode())
f.write(config_file_content.encode())
f.seek(0)
env_config = EnvConfigReader(config_path=f.name)
assert env_config.get(TEST_CONFIG_KEY) == expected_value

assert env_config.get(TEST_CONFIG_KEY, default="default") == expected_value


@pytest.mark.parametrize(
"env_value, expected_value",
"env_value, akv_value, expected_value",
[
(TEST_CONFIG_ENV_VAL, TEST_CONFIG_ENV_VAL),
(None, None),
(TEST_CONFIG_ENV_VAL, TEST_CONFIG_ENV_VAL),
(TEST_CONFIG_ENV_VAL, TEST_CONFIG_AKV_VAL, TEST_CONFIG_ENV_VAL),
(None, TEST_CONFIG_AKV_VAL, TEST_CONFIG_AKV_VAL),
(None, None, None),
]
)
def test__envvariableutil__get_from_env_or_akv(
mocker: MockerFixture,
env_value: str,
akv_value: str,
expected_value: str,
):
"""Test `get_from_env_or_akv` method if it returns the environment variable regardless of `use_env_vars` argument.

Args:
mocker (MockerFixture): _description_
env_value (str): _description_
expected_value (str): _description_
"""Test `get_from_env_or_akv` method if it returns the expected value.
"""
if env_value:
mocker.patch.object(feathr.utils._env_config_reader.os, "environ", {TEST_CONFIG_KEY: env_value})
# Mock env variables
mocker.patch.object(feathr.utils._env_config_reader.os, "environ", {
TEST_CONFIG_KEY: env_value,
"secrets__azure_key_vault__name": "test_akv_name",
})
# Mock AKS
mocker.patch.object(
feathr.utils._env_config_reader.AzureKeyVaultClient,
"get_feathr_akv_secret",
return_value=akv_value,
)

f = NamedTemporaryFile(delete=True)
f.write(TEST_CONFIG_FILE_CONTENT.encode())
f.seek(0)
env_config = EnvConfigReader(config_path=f.name)
env_config = EnvConfigReader(config_path="")
assert env_config.get_from_env_or_akv(TEST_CONFIG_KEY) == expected_value