From 4e80892bbc87ac4f38c89d5d0ec28df98a09fb54 Mon Sep 17 00:00:00 2001 From: Merel Theisen Date: Tue, 30 May 2023 18:10:08 +0200 Subject: [PATCH 1/8] Enable variable interpolation in catalog by escaping _ Signed-off-by: Merel Theisen --- kedro/config/omegaconf_config.py | 16 ++++++++++++++-- tests/config/test_omegaconf_config.py | 16 ++++++++++++++++ 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/kedro/config/omegaconf_config.py b/kedro/config/omegaconf_config.py index ac4e2fc56d..49a176ea7e 100644 --- a/kedro/config/omegaconf_config.py +++ b/kedro/config/omegaconf_config.py @@ -286,7 +286,15 @@ def load_and_merge_dir_config( # pylint: disable=too-many-arguments return OmegaConf.to_container( OmegaConf.merge(*aggregate_config, self.runtime_params), resolve=True ) - return OmegaConf.to_container(OmegaConf.merge(*aggregate_config), resolve=True) + # return OmegaConf.to_container(OmegaConf.merge(*aggregate_config), resolve=True) + + return { + k: v + for k, v in OmegaConf.to_container( + OmegaConf.merge(*aggregate_config), resolve=True + ).items() + if not k.startswith("_") + } def _is_valid_config_path(self, path): """Check if given path is a file path and file type is yaml or json.""" @@ -307,7 +315,11 @@ def _check_duplicates(seen_files_to_keys: dict[Path, set[Any]]): for filepath2 in filepaths[i:]: config2 = seen_files_to_keys[filepath2] - overlapping_keys = config1 & config2 + combined_keys = config1 & config2 + overlapping_keys = set() + for key in combined_keys: + if not key.startswith("_"): + overlapping_keys.add(key) if overlapping_keys: sorted_keys = ", ".join(sorted(overlapping_keys)) diff --git a/tests/config/test_omegaconf_config.py b/tests/config/test_omegaconf_config.py index a0b152039a..23802d6e4c 100644 --- a/tests/config/test_omegaconf_config.py +++ b/tests/config/test_omegaconf_config.py @@ -596,3 +596,19 @@ def test_runtime_params_not_propogate_non_parameters_config(self, tmp_path): assert key not in credentials assert key not in logging assert key not in spark + + def test_ignore_hidden_keys(self, tmp_path): + """Check that the config key starting with `_` are ignored and also + don't cause a config merge error""" + _write_yaml(tmp_path / _BASE_ENV / "catalog1.yml", {"k1": "v1", "_k2": "v2"}) + _write_yaml(tmp_path / _BASE_ENV / "catalog2.yml", {"k3": "v3", "_k2": "v4"}) + + conf = OmegaConfigLoader(str(tmp_path)) + conf.default_run_env = "" + catalog = conf["catalog"] + assert catalog.keys() == {"k1", "k3"} + + _write_yaml(tmp_path / _BASE_ENV / "catalog3.yml", {"k1": "dup", "_k2": "v5"}) + pattern = r"Duplicate keys found in .*catalog1\.yml and .*catalog3\.yml\: k1" + with pytest.raises(ValueError, match=pattern): + conf["catalog"] From b43d30962598a1b6b40b48dc3a62298f175b0635 Mon Sep 17 00:00:00 2001 From: Merel Theisen Date: Tue, 30 May 2023 18:28:33 +0200 Subject: [PATCH 2/8] Fix test Signed-off-by: Merel Theisen --- tests/config/test_omegaconf_config.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/config/test_omegaconf_config.py b/tests/config/test_omegaconf_config.py index 23802d6e4c..76b485f376 100644 --- a/tests/config/test_omegaconf_config.py +++ b/tests/config/test_omegaconf_config.py @@ -609,6 +609,10 @@ def test_ignore_hidden_keys(self, tmp_path): assert catalog.keys() == {"k1", "k3"} _write_yaml(tmp_path / _BASE_ENV / "catalog3.yml", {"k1": "dup", "_k2": "v5"}) - pattern = r"Duplicate keys found in .*catalog1\.yml and .*catalog3\.yml\: k1" + pattern = ( + r"Duplicate keys found in " + r"(.*catalog1\.yml and .*catalog3\.yml|.*catalog3\.yml and .*catalog1\.yml)" + r"\: k1" + ) with pytest.raises(ValueError, match=pattern): conf["catalog"] From f0504a11013132ccbb03297369876aec9c62a352 Mon Sep 17 00:00:00 2001 From: Merel Theisen Date: Tue, 30 May 2023 20:58:36 +0200 Subject: [PATCH 3/8] Update docs Signed-off-by: Merel Theisen --- .../configuration/advanced_configuration.md | 21 +++++++++++++++++-- .../configuration/configuration_basics.md | 3 ++- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/docs/source/configuration/advanced_configuration.md b/docs/source/configuration/advanced_configuration.md index 4e5a8b9b3a..8bd4c2fb62 100644 --- a/docs/source/configuration/advanced_configuration.md +++ b/docs/source/configuration/advanced_configuration.md @@ -236,10 +236,27 @@ data: Since both of the file names (`parameters.yml` and `parameters_globals.yml`) match the config pattern for parameters, the `OmegaConfigLoader` will load the files and resolve the placeholders correctly. -```{note} -Templating currently only works for parameter files, but not for catalog files. +From Kedro `0.18.10` templating also works for catalog files. To enable templating in the catalog you need to ensure that the template values are within the catalog files or the name of the file that contains the template values follows the same config pattern specified for catalogs. +By default, the config pattern for catalogs is: `["catalog*", "catalog*/**", "**/catalog*"]`. + +Additionally, any template values in the catalog need to start with an underscore `_`. This is because of how catalog entries are validated. + +Suppose you have one catalog file called `catalog.yml` containing entries with `omegaconf` placeholders like this: + +```yaml +companies: + type: ${_pandas.type} + filepath: data/01_raw/companies.csv ``` +and a file containing the template values called `catalog_globals.yml`: +```yaml +_pandas: + type: pandas.CSVDataSet +``` + +Since both of the file names (`catalog.yml` and `catalog_globals.yml`) match the config pattern for catalogs, the `OmegaConfigLoader` will load the files and resolve the placeholders correctly. + ### How to use custom resolvers in the `OmegaConfigLoader` `Omegaconf` provides functionality to [register custom resolvers](https://omegaconf.readthedocs.io/en/2.3_branch/usage.html#resolvers) for templated values. You can use these custom resolves within Kedro by extending the [`OmegaConfigLoader`](/kedro.config.OmegaConfigLoader) class. The example below illustrates this: diff --git a/docs/source/configuration/configuration_basics.md b/docs/source/configuration/configuration_basics.md index 54a1396ddf..197d8b2478 100644 --- a/docs/source/configuration/configuration_basics.md +++ b/docs/source/configuration/configuration_basics.md @@ -45,7 +45,8 @@ Kedro merges configuration information and returns a configuration dictionary ac * If any two configuration files located inside the **same** environment path (such as `conf/base/`) contain the same top-level key, the configuration loader raises a `ValueError` indicating that duplicates are not allowed. * If two configuration files contain the same top-level key but are in **different** environment paths (for example, one in `conf/base/`, another in `conf/local/`) then the last loaded path (`conf/local/`) takes precedence as the key value. `ConfigLoader.get` does not raise any errors but a `DEBUG` level log message is emitted with information on the overridden keys. -When using the default `ConfigLoader` or the `TemplatedConfigLoader`, any top-level keys that start with `_` are considered hidden (or reserved) and are ignored. Those keys will neither trigger a key duplication error nor appear in the resulting configuration dictionary. However, you can still use such keys, for example, as [YAML anchors and aliases](https://www.educative.io/blog/advanced-yaml-syntax-cheatsheet#anchors). +When using any of the configuration loaders, any top-level keys that start with `_` are considered hidden (or reserved) and are ignored. Those keys will neither trigger a key duplication error nor appear in the resulting configuration dictionary. However, you can still use such keys, for example, as [YAML anchors and aliases](https://www.educative.io/blog/advanced-yaml-syntax-cheatsheet#anchors) +or [to enable templating in the catalog when using the `OmegaConfigLoader`](advanced_configuration.md#how-to-do-templating-with-the-omegaconfigloader). ### Configuration file names Configuration files will be matched according to file name and type rules. Suppose the config loader needs to fetch the catalog configuration, it will search according to the following rules: From bce3ceb43db347c66995e597000b4f8a119155e2 Mon Sep 17 00:00:00 2001 From: Merel Theisen Date: Wed, 31 May 2023 11:35:33 +0200 Subject: [PATCH 4/8] Clean ups + add two more tests Signed-off-by: Merel Theisen --- kedro/config/omegaconf_config.py | 9 +++---- tests/config/test_omegaconf_config.py | 35 ++++++++++++++++++++++++++- 2 files changed, 37 insertions(+), 7 deletions(-) diff --git a/kedro/config/omegaconf_config.py b/kedro/config/omegaconf_config.py index 49a176ea7e..75303e2902 100644 --- a/kedro/config/omegaconf_config.py +++ b/kedro/config/omegaconf_config.py @@ -286,8 +286,6 @@ def load_and_merge_dir_config( # pylint: disable=too-many-arguments return OmegaConf.to_container( OmegaConf.merge(*aggregate_config, self.runtime_params), resolve=True ) - # return OmegaConf.to_container(OmegaConf.merge(*aggregate_config), resolve=True) - return { k: v for k, v in OmegaConf.to_container( @@ -316,10 +314,9 @@ def _check_duplicates(seen_files_to_keys: dict[Path, set[Any]]): config2 = seen_files_to_keys[filepath2] combined_keys = config1 & config2 - overlapping_keys = set() - for key in combined_keys: - if not key.startswith("_"): - overlapping_keys.add(key) + overlapping_keys = { + key for key in combined_keys if not key.startswith("_") + } if overlapping_keys: sorted_keys = ", ".join(sorted(overlapping_keys)) diff --git a/tests/config/test_omegaconf_config.py b/tests/config/test_omegaconf_config.py index 76b485f376..13f621ced3 100644 --- a/tests/config/test_omegaconf_config.py +++ b/tests/config/test_omegaconf_config.py @@ -73,7 +73,6 @@ def create_config_dir(tmp_path, base_config, local_config): base_catalog = tmp_path / _BASE_ENV / "catalog.yml" base_logging = tmp_path / _BASE_ENV / "logging.yml" base_spark = tmp_path / _BASE_ENV / "spark.yml" - base_catalog = tmp_path / _BASE_ENV / "catalog.yml" local_catalog = tmp_path / _DEFAULT_RUN_ENV / "catalog.yml" @@ -616,3 +615,37 @@ def test_ignore_hidden_keys(self, tmp_path): ) with pytest.raises(ValueError, match=pattern): conf["catalog"] + + def test_variable_interpolation_in_catalog_with_templates(self, tmp_path): + base_catalog = tmp_path / _BASE_ENV / "catalog.yml" + catalog_config = { + "companies": { + "type": "${_pandas.type}", + "filepath": "data/01_raw/companies.csv", + }, + "_pandas": {"type": "pandas.CSVDataSet"}, + } + _write_yaml(base_catalog, catalog_config) + + conf = OmegaConfigLoader(str(tmp_path)) + conf.default_run_env = "" + assert conf["catalog"]["companies"]["type"] == "pandas.CSVDataSet" + + def test_variable_interpolation_in_catalog_with_separate_templates_file( + self, tmp_path + ): + base_catalog = tmp_path / _BASE_ENV / "catalog.yml" + catalog_config = { + "companies": { + "type": "${_pandas.type}", + "filepath": "data/01_raw/companies.csv", + } + } + temp_catalog = tmp_path / _BASE_ENV / "catalog_temp.yml" + template = {"_pandas": {"type": "pandas.CSVDataSet"}} + _write_yaml(base_catalog, catalog_config) + _write_yaml(temp_catalog, template) + + conf = OmegaConfigLoader(str(tmp_path)) + conf.default_run_env = "" + assert conf["catalog"]["companies"]["type"] == "pandas.CSVDataSet" From 11e5476d8054a39884b4dd1c305d228dba840204 Mon Sep 17 00:00:00 2001 From: Merel Theisen Date: Thu, 1 Jun 2023 14:17:40 +0200 Subject: [PATCH 5/8] Address review comments Signed-off-by: Merel Theisen --- docs/source/configuration/advanced_configuration.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/source/configuration/advanced_configuration.md b/docs/source/configuration/advanced_configuration.md index 8bd4c2fb62..e630002669 100644 --- a/docs/source/configuration/advanced_configuration.md +++ b/docs/source/configuration/advanced_configuration.md @@ -218,6 +218,7 @@ Although Jinja2 is a very powerful and extremely flexible template engine, which ### How to do templating with the `OmegaConfigLoader` +#### Parameters Templating or [variable interpolation](https://omegaconf.readthedocs.io/en/2.3_branch/usage.html#variable-interpolation), as it's called in `OmegaConf`, for parameters works out of the box if the template values are within the parameter files or the name of the file that contains the template values follows the same config pattern specified for parameters. By default, the config pattern for parameters is: `["parameters*", "parameters*/**", "**/parameters*"]`. Suppose you have one parameters file called `parameters.yml` containing parameters with `omegaconf` placeholders like this: @@ -236,10 +237,11 @@ data: Since both of the file names (`parameters.yml` and `parameters_globals.yml`) match the config pattern for parameters, the `OmegaConfigLoader` will load the files and resolve the placeholders correctly. +#### Catalog From Kedro `0.18.10` templating also works for catalog files. To enable templating in the catalog you need to ensure that the template values are within the catalog files or the name of the file that contains the template values follows the same config pattern specified for catalogs. By default, the config pattern for catalogs is: `["catalog*", "catalog*/**", "**/catalog*"]`. -Additionally, any template values in the catalog need to start with an underscore `_`. This is because of how catalog entries are validated. +Additionally, any template values in the catalog need to start with an underscore `_`. This is because of how catalog entries are validated. Templated values will neither trigger a key duplication error nor appear in the resulting configuration dictionary. Suppose you have one catalog file called `catalog.yml` containing entries with `omegaconf` placeholders like this: From 78ffc61a1180cbc9a368f4a3f1e989565bb560e4 Mon Sep 17 00:00:00 2001 From: Merel Theisen Date: Thu, 1 Jun 2023 14:28:07 +0200 Subject: [PATCH 6/8] Update release notes Signed-off-by: Merel Theisen --- RELEASE.md | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/RELEASE.md b/RELEASE.md index 09416a5b36..f6bfa3b60c 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -8,6 +8,17 @@ ## Migration guide from Kedro 0.18.* to 0.19.* +# Upcoming Release 0.18.10 + +## Major features and improvements +* Added support for variable interpolation in the catalog with the `OmegaConfigLoader`. + +## Bug fixes and other changes + +## Breaking changes to the API + +## Upcoming deprecations for Kedro 0.19.0 + # Release 0.18.9 ## Major features and improvements @@ -35,7 +46,6 @@ Many thanks to the following Kedroids for contributing PRs to this release: ## Upcoming deprecations for Kedro 0.19.0 - # Release 0.18.8 ## Major features and improvements From 52650d8b8d89c20068593a4cad62c77e5c52efa7 Mon Sep 17 00:00:00 2001 From: Merel Theisen <49397448+merelcht@users.noreply.github.com> Date: Fri, 2 Jun 2023 10:52:46 +0100 Subject: [PATCH 7/8] Update tests/config/test_omegaconf_config.py Co-authored-by: Nok Lam Chan --- tests/config/test_omegaconf_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/config/test_omegaconf_config.py b/tests/config/test_omegaconf_config.py index 13f621ced3..bc55715075 100644 --- a/tests/config/test_omegaconf_config.py +++ b/tests/config/test_omegaconf_config.py @@ -641,7 +641,7 @@ def test_variable_interpolation_in_catalog_with_separate_templates_file( "filepath": "data/01_raw/companies.csv", } } - temp_catalog = tmp_path / _BASE_ENV / "catalog_temp.yml" + tmp_catalog= tmp_path / _BASE_ENV / "catalog_temp.yml" template = {"_pandas": {"type": "pandas.CSVDataSet"}} _write_yaml(base_catalog, catalog_config) _write_yaml(temp_catalog, template) From 18dcb7f8890b13c9c825821abe01ba8de57ad659 Mon Sep 17 00:00:00 2001 From: Merel Theisen Date: Fri, 2 Jun 2023 12:59:47 +0200 Subject: [PATCH 8/8] Address review comments + lint Signed-off-by: Merel Theisen --- docs/source/configuration/advanced_configuration.md | 3 +++ tests/config/test_omegaconf_config.py | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/docs/source/configuration/advanced_configuration.md b/docs/source/configuration/advanced_configuration.md index e630002669..efd71a8564 100644 --- a/docs/source/configuration/advanced_configuration.md +++ b/docs/source/configuration/advanced_configuration.md @@ -259,6 +259,9 @@ _pandas: Since both of the file names (`catalog.yml` and `catalog_globals.yml`) match the config pattern for catalogs, the `OmegaConfigLoader` will load the files and resolve the placeholders correctly. +#### Other configuration files +It's also possible to use variable interpolation in configuration files other than parameters and catalog, such as custom spark or mlflow configuration. This works in the same way as variable interpolation in parameter files. You can still use the underscore for the templated values if you want, but it's not mandatory like it is for catalog files. + ### How to use custom resolvers in the `OmegaConfigLoader` `Omegaconf` provides functionality to [register custom resolvers](https://omegaconf.readthedocs.io/en/2.3_branch/usage.html#resolvers) for templated values. You can use these custom resolves within Kedro by extending the [`OmegaConfigLoader`](/kedro.config.OmegaConfigLoader) class. The example below illustrates this: diff --git a/tests/config/test_omegaconf_config.py b/tests/config/test_omegaconf_config.py index bc55715075..dd49292019 100644 --- a/tests/config/test_omegaconf_config.py +++ b/tests/config/test_omegaconf_config.py @@ -641,10 +641,10 @@ def test_variable_interpolation_in_catalog_with_separate_templates_file( "filepath": "data/01_raw/companies.csv", } } - tmp_catalog= tmp_path / _BASE_ENV / "catalog_temp.yml" + tmp_catalog = tmp_path / _BASE_ENV / "catalog_temp.yml" template = {"_pandas": {"type": "pandas.CSVDataSet"}} _write_yaml(base_catalog, catalog_config) - _write_yaml(temp_catalog, template) + _write_yaml(tmp_catalog, template) conf = OmegaConfigLoader(str(tmp_path)) conf.default_run_env = ""