diff --git a/docs/samples/feature_embedding.ipynb b/docs/samples/feature_embedding.ipynb index 27498b1f5..468eb1250 100644 --- a/docs/samples/feature_embedding.ipynb +++ b/docs/samples/feature_embedding.ipynb @@ -306,7 +306,9 @@ " },\n", " \"new_cluster\": databricks_cluster_config,\n", "}\n", + "os.environ[\"SPARK_CONFIG__DATABRICKS__CONFIG_TEMPLATE\"] = json.dumps(databricks_config)\n", "\n", + "# Note, config arguments maybe overridden by the environment variables. \n", "config_path = generate_config(\n", " resource_prefix=RESOURCE_PREFIX,\n", " project_name=PROJECT_NAME,\n", @@ -316,7 +318,6 @@ " databricks_workspace_token_value=DATABRICKS_WORKSPACE_TOKEN_VALUE,\n", " spark_config__databricks__work_dir=f\"dbfs:/{PROJECT_NAME}\",\n", " spark_config__databricks__workspace_instance_url=SPARK_CONFIG__DATABRICKS__WORKSPACE_INSTANCE_URL,\n", - " spark_config__databricks__config_template=json.dumps(databricks_config),\n", " feature_registry__api_endpoint=REGISTRY_ENDPOINT,\n", ")\n", "\n", @@ -341,7 +342,6 @@ "client = FeathrClient(\n", " config_path=config_path,\n", " credential=credential,\n", - " use_env_vars=False,\n", ")" ] }, @@ -808,7 +808,7 @@ "widgets": {} }, "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "Python 3", "language": "python", "name": "python3" }, @@ -822,11 +822,11 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6 (default, Oct 18 2022, 12:41:40) \n[Clang 14.0.0 (clang-1400.0.29.202)]" + "version": "3.10.9" }, "vscode": { "interpreter": { - "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6" + "hash": "e34a1a57d2e174682770a82d94a178aa36d3ccfaa21227c5d2308e319b7ae532" } } }, diff --git a/feathr_project/feathr/client.py b/feathr_project/feathr/client.py index eb5e4ca80..ac41af8b6 100644 --- a/feathr_project/feathr/client.py +++ b/feathr_project/feathr/client.py @@ -65,22 +65,24 @@ def __init__( local_workspace_dir: str = None, credential: Any = None, project_registry_tag: Dict[str, str] = None, - use_env_vars: bool = True, ): """Initialize Feathr Client. + Configuration values used by the Feathr are evaluated in the following precedence, with items higher on the list taking priority. + 1. Environment variables + 2. Values in the configuration file + 3. Values in the Azure Key Vault Args: config_path (optional): Config yaml file path. See [Feathr Config Template](https://github.com/feathr-ai/feathr/blob/main/feathr_project/feathrcli/data/feathr_user_workspace/feathr_config.yaml) for more details. Defaults to "./feathr_config.yaml". local_workspace_dir (optional): Set where is the local work space dir. If not set, Feathr will create a temporary folder to store local workspace related files. credential (optional): Azure credential to access cloud resources, most likely to be the returned result of DefaultAzureCredential(). If not set, Feathr will initialize DefaultAzureCredential() inside the __init__ function to get credentials. project_registry_tag (optional): Adding tags for project in Feathr registry. This might be useful if you want to tag your project as deprecated, or allow certain customizations on project level. Default is empty - use_env_vars (optional): Whether to use environment variables to set up the client. If set to False, the client will not use environment variables to set up the client. Defaults to True. """ self.logger = logging.getLogger(__name__) # Redis key separator self._KEY_SEPARATOR = ':' self._COMPOSITE_KEY_SEPARATOR = '#' - self.env_config = EnvConfigReader(config_path=config_path, use_env_vars=use_env_vars) + self.env_config = EnvConfigReader(config_path=config_path) if local_workspace_dir: self.local_workspace_dir = local_workspace_dir else: @@ -322,7 +324,7 @@ def get_online_features(self, feature_table: str, key: Any, feature_names: List[ Args: feature_table: the name of the feature table. - key: the key/key list of the entity; + key: the key/key list of the entity; for key list, please make sure the order is consistent with the one in feature's definition; the order can be found by 'get_features_from_registry'. feature_names: list of feature names to fetch diff --git a/feathr_project/feathr/utils/_env_config_reader.py b/feathr_project/feathr/utils/_env_config_reader.py index fd4555433..7c09ba6a5 100644 --- a/feathr_project/feathr/utils/_env_config_reader.py +++ b/feathr_project/feathr/utils/_env_config_reader.py @@ -16,7 +16,7 @@ class EnvConfigReader(object): akv_name: str = None # Azure Key Vault name to use for retrieving config values. yaml_config: dict = None # YAML config file content. - def __init__(self, config_path: str, use_env_vars: bool = True): + def __init__(self, config_path: str): """Initialize the utility class. Args: @@ -31,8 +31,6 @@ def __init__(self, config_path: str, use_env_vars: bool = True): except yaml.YAMLError as e: logger.warning(e) - self.use_env_vars = use_env_vars - self.akv_name = self.get("secrets__azure_key_vault__name") self.akv_client = AzureKeyVaultClient(self.akv_name) if self.akv_name else None @@ -51,9 +49,9 @@ def get(self, key: str, default: str = None) -> str: Returns: Feathr client's config value. """ - res_env = (self._get_variable_from_env(key) if self.use_env_vars else None) - res_file = (self._get_variable_from_file(key) if self.yaml_config else None) - res_keyvault = (self._get_variable_from_akv(key) if self.akv_name else None) + res_env = self._get_variable_from_env(key) + res_file = (self._get_variable_from_file(key) if self.yaml_config else None) + res_keyvault = (self._get_variable_from_akv(key) if self.akv_name else None) # rewrite the logic below to make sure: # First we have the order (i.e. res1 > res2 > res3 > default) @@ -63,7 +61,7 @@ def get(self, key: str, default: str = None) -> str: return res logger.info(f"Config {key} is not found in the environment variable, configuration file, or the remote key value store. Using default value which is {default}.") - + return default def get_from_env_or_akv(self, key: str) -> str: @@ -80,8 +78,8 @@ def get_from_env_or_akv(self, key: str) -> str: Returns: Feathr client's config value. """ - res_env = (self._get_variable_from_env(key) if self.use_env_vars else None) - res_keyvault = (self._get_variable_from_akv(key) if self.akv_name else None) + res_env = self._get_variable_from_env(key) + res_keyvault = (self._get_variable_from_akv(key) if self.akv_name else None) # rewrite the logic below to make sure: # First we have the order (i.e. res1 > res2 > res3 > default) @@ -92,8 +90,6 @@ def get_from_env_or_akv(self, key: str) -> str: logger.warning(f"Config {key} is not found in the environment variable or the remote key value store.") return None - - def _get_variable_from_env(self, key: str) -> str: # make it work for lower case and upper case. diff --git a/feathr_project/feathr/utils/config.py b/feathr_project/feathr/utils/config.py index d8ce248bc..5f92ccb59 100644 --- a/feathr_project/feathr/utils/config.py +++ b/feathr_project/feathr/utils/config.py @@ -59,7 +59,8 @@ def generate_config( adls_key: str = None, **kwargs, ) -> str: - """Generate a feathr config yaml file. + """Generate a feathr config yaml file. Note, Feathr client will try to read environment variables first before read the config file. + See details from the FeathrClient docstrings. Note: This utility function assumes Azure resources are deployed using the Azure Resource Manager (ARM) template, diff --git a/feathr_project/test/unit/utils/test_env_config_reader.py b/feathr_project/test/unit/utils/test_env_config_reader.py index 98e591808..aa2c73902 100644 --- a/feathr_project/test/unit/utils/test_env_config_reader.py +++ b/feathr_project/test/unit/utils/test_env_config_reader.py @@ -18,21 +18,18 @@ @pytest.mark.parametrize( - "use_env_vars, env_value, expected_value", + "env_value, expected_value", [ - (True, TEST_CONFIG_ENV_VAL, TEST_CONFIG_ENV_VAL), - (True, None, TEST_CONFIG_FILE_VAL), - (False, TEST_CONFIG_ENV_VAL, TEST_CONFIG_FILE_VAL), + (TEST_CONFIG_ENV_VAL, TEST_CONFIG_ENV_VAL), + (None, TEST_CONFIG_FILE_VAL), ] ) def test__envvariableutil__get( mocker: MockerFixture, - use_env_vars: bool, env_value: str, expected_value: str, ): - """Test `get` method if it returns the correct value - along with `use_env_vars` argument. + """Test `get` method if it returns the correct value. """ if env_value: mocker.patch.object(feathr.utils._env_config_reader.os, "environ", {TEST_CONFIG_KEY: env_value}) @@ -40,21 +37,20 @@ def test__envvariableutil__get( f = NamedTemporaryFile(delete=True) f.write(TEST_CONFIG_FILE_CONTENT.encode()) f.seek(0) - env_config = EnvConfigReader(config_path=f.name, use_env_vars=use_env_vars) + env_config = EnvConfigReader(config_path=f.name) assert env_config.get(TEST_CONFIG_KEY) == expected_value @pytest.mark.parametrize( - "use_env_vars, env_value, expected_value", + "env_value, expected_value", [ - (True, TEST_CONFIG_ENV_VAL, TEST_CONFIG_ENV_VAL), - (True, None, None), - (False, TEST_CONFIG_ENV_VAL, TEST_CONFIG_ENV_VAL), + (TEST_CONFIG_ENV_VAL, TEST_CONFIG_ENV_VAL), + (None, None), + (TEST_CONFIG_ENV_VAL, TEST_CONFIG_ENV_VAL), ] ) def test__envvariableutil__get_from_env_or_akv( mocker: MockerFixture, - use_env_vars: bool, env_value: str, expected_value: str, ): @@ -62,7 +58,6 @@ def test__envvariableutil__get_from_env_or_akv( Args: mocker (MockerFixture): _description_ - use_env_vars (bool): _description_ env_value (str): _description_ expected_value (str): _description_ """ @@ -72,5 +67,5 @@ def test__envvariableutil__get_from_env_or_akv( f = NamedTemporaryFile(delete=True) f.write(TEST_CONFIG_FILE_CONTENT.encode()) f.seek(0) - env_config = EnvConfigReader(config_path=f.name, use_env_vars=use_env_vars) + env_config = EnvConfigReader(config_path=f.name) assert env_config.get_from_env_or_akv(TEST_CONFIG_KEY) == expected_value