From 00f7e5724fe1f02e15873ef894657a079471435a Mon Sep 17 00:00:00 2001 From: Xiao Yu Date: Sat, 12 Feb 2022 18:17:08 +0000 Subject: [PATCH 01/11] Filter out default configs when overrides exist. When sending configs to Airflow workers we materialize a temp config file. In #18772 a feature was added so that `_cmd` generated secrets are not written to the files in some cases instead favoring maintaining the raw `_cmd` settings. Unfortunately during materializing of the configs via `as_dict()` Airflow defaults are generated and materialized as well including defaults for the non `_cmd` versions of some settings. And due to Airflow setting precedence stating bare versions of settings winning over `_cmd` versions it results in `_cmd` settings being discarded: https://airflow.apache.org/docs/apache-airflow/stable/howto/set-config.html This change checks `_cmd`, env, and secrets when materializing configs via `as_dict()` so that if the bare versions of the values is exactly the same as Airflow defaults and we have "hidden" / special versions of these configs that are trying to be set we remove the bare versions so that the correct version can be used. Fixes: #20092 Related to: #18772 #4050 --- airflow/configuration.py | 21 +++++++++++++++++++++ tests/core/test_configuration.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) diff --git a/airflow/configuration.py b/airflow/configuration.py index e36917a08a42e..e66c40a0fec4d 100644 --- a/airflow/configuration.py +++ b/airflow/configuration.py @@ -644,14 +644,21 @@ def as_dict( # add env vars and overwrite because they have priority if include_env: self._include_envs(config_sources, display_sensitive, display_source, raw) + else: + self._filter_by_source(config_sources, display_source, self._get_env_var_option) # add bash commands if include_cmds: self._include_commands(config_sources, display_sensitive, display_source, raw) + else: + self._filter_by_source(config_sources, display_source, self._get_cmd_option) # add config from secret backends if include_secret: self._include_secrets(config_sources, display_sensitive, display_source, raw) + else: + self._filter_by_source(config_sources, display_source, self._get_env_var_option) + return config_sources def _include_secrets(self, config_sources, display_sensitive, display_source, raw): @@ -709,6 +716,20 @@ def _include_envs(self, config_sources, display_sensitive, display_source, raw): key = key.lower() config_sources.setdefault(section, OrderedDict()).update({key: opt}) + def _filter_by_source(self, config_sources, display_source, getter_func): + for (section, key) in self.sensitive_config_values: + opt = getter_func(section, key) + if not opt: + continue + if section not in config_sources or key not in config_sources[section]: + continue + if display_source: + opt, source = config_sources[section][key] + else: + opt = config_sources[section][key] + if opt == self.airflow_defaults.get(section, key): + del config_sources[section][key] + @staticmethod def _replace_config_with_display_sources(config_sources, configs, display_source, raw): for (source_name, config) in configs: diff --git a/tests/core/test_configuration.py b/tests/core/test_configuration.py index d35267db6c0f1..4d0aa03b0db5f 100644 --- a/tests/core/test_configuration.py +++ b/tests/core/test_configuration.py @@ -769,3 +769,31 @@ def test_enum_logging_levels(self): "CRITICAL, FATAL, ERROR, WARN, WARNING, INFO, DEBUG." ) assert message == exception + + def test_as_dict_works_without_sensitive_cmds(self): + test_conf = conf + + conf_materialize_cmds = test_conf.as_dict(display_sensitive=True, raw=True, include_cmds=True) + conf_maintain_cmds = test_conf.as_dict(display_sensitive=True, raw=True, include_cmds=False) + + assert conf_materialize_cmds['core']['sql_alchemy_conn'] == 'sqlite:////root/airflow/airflow.db' + assert 'sql_alchemy_conn_cmd' not in conf_materialize_cmds['core'] + + assert conf_maintain_cmds['core']['sql_alchemy_conn'] == 'sqlite:////root/airflow/airflow.db' + assert 'sql_alchemy_conn_cmd' not in conf_maintain_cmds['core'] + + def test_as_dict_respects_sensitive_cmds(self): + test_conf = conf + test_conf.read_string(textwrap.dedent(""" + [core] + sql_alchemy_conn_cmd = echo -n my-super-secret-conn + """)) + + conf_materialize_cmds = test_conf.as_dict(display_sensitive=True, raw=True, include_cmds=True) + conf_maintain_cmds = test_conf.as_dict(display_sensitive=True, raw=True, include_cmds=False) + + assert conf_materialize_cmds['core']['sql_alchemy_conn'] == 'my-super-secret-conn' + assert 'sql_alchemy_conn_cmd' not in conf_materialize_cmds['core'] + + assert 'sql_alchemy_conn' not in conf_maintain_cmds['core'] + assert conf_maintain_cmds['core']['sql_alchemy_conn_cmd'] == 'echo -n my-super-secret-conn' From 5de0978d0644f3362cea8d6fb382ceb839fdd695 Mon Sep 17 00:00:00 2001 From: Xiao Yu Date: Mon, 14 Feb 2022 20:38:13 +0000 Subject: [PATCH 02/11] Black Reformat --- tests/core/test_configuration.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tests/core/test_configuration.py b/tests/core/test_configuration.py index 4d0aa03b0db5f..8fe9e4f91b77c 100644 --- a/tests/core/test_configuration.py +++ b/tests/core/test_configuration.py @@ -784,10 +784,14 @@ def test_as_dict_works_without_sensitive_cmds(self): def test_as_dict_respects_sensitive_cmds(self): test_conf = conf - test_conf.read_string(textwrap.dedent(""" - [core] - sql_alchemy_conn_cmd = echo -n my-super-secret-conn - """)) + test_conf.read_string( + textwrap.dedent( + """ + [core] + sql_alchemy_conn_cmd = echo -n my-super-secret-conn + """ + ) + ) conf_materialize_cmds = test_conf.as_dict(display_sensitive=True, raw=True, include_cmds=True) conf_maintain_cmds = test_conf.as_dict(display_sensitive=True, raw=True, include_cmds=False) From 31703962e64b5a7058de96fbfb82e707e3cb1d5e Mon Sep 17 00:00:00 2001 From: Xiao Yu Date: Mon, 14 Feb 2022 23:29:19 +0000 Subject: [PATCH 03/11] Update tests --- tests/core/test_configuration.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/tests/core/test_configuration.py b/tests/core/test_configuration.py index 8fe9e4f91b77c..92324142234e3 100644 --- a/tests/core/test_configuration.py +++ b/tests/core/test_configuration.py @@ -776,13 +776,16 @@ def test_as_dict_works_without_sensitive_cmds(self): conf_materialize_cmds = test_conf.as_dict(display_sensitive=True, raw=True, include_cmds=True) conf_maintain_cmds = test_conf.as_dict(display_sensitive=True, raw=True, include_cmds=False) - assert conf_materialize_cmds['core']['sql_alchemy_conn'] == 'sqlite:////root/airflow/airflow.db' + assert 'sql_alchemy_conn' in conf_materialize_cmds['core'] assert 'sql_alchemy_conn_cmd' not in conf_materialize_cmds['core'] - assert conf_maintain_cmds['core']['sql_alchemy_conn'] == 'sqlite:////root/airflow/airflow.db' + assert 'sql_alchemy_conn' in conf_maintain_cmds['core'] assert 'sql_alchemy_conn_cmd' not in conf_maintain_cmds['core'] + assert conf_materialize_cmds['core']['sql_alchemy_conn'] == conf_maintain_cmds['core']['sql_alchemy_conn'] + def test_as_dict_respects_sensitive_cmds(self): + conf_conn = conf['core']['sql_alchemy_conn'] test_conf = conf test_conf.read_string( textwrap.dedent( @@ -796,8 +799,15 @@ def test_as_dict_respects_sensitive_cmds(self): conf_materialize_cmds = test_conf.as_dict(display_sensitive=True, raw=True, include_cmds=True) conf_maintain_cmds = test_conf.as_dict(display_sensitive=True, raw=True, include_cmds=False) - assert conf_materialize_cmds['core']['sql_alchemy_conn'] == 'my-super-secret-conn' + assert 'sql_alchemy_conn' in conf_materialize_cmds['core'] assert 'sql_alchemy_conn_cmd' not in conf_materialize_cmds['core'] - assert 'sql_alchemy_conn' not in conf_maintain_cmds['core'] + if conf_conn == test_conf.airflow_defaults['core']['sql_alchemy_conn']: + assert conf_materialize_cmds['core']['sql_alchemy_conn'] == 'my-super-secret-conn' + + assert 'sql_alchemy_conn_cmd' in conf_maintain_cmds['core'] assert conf_maintain_cmds['core']['sql_alchemy_conn_cmd'] == 'echo -n my-super-secret-conn' + + if conf_conn != test_conf.airflow_defaults['core']['sql_alchemy_conn']: + assert 'sql_alchemy_conn' in conf_maintain_cmds['core'] + assert conf_maintain_cmds['core']['sql_alchemy_conn'] == conf_conn From 5f4f40bf95eaa72b885c234ead9aaaf764921dd4 Mon Sep 17 00:00:00 2001 From: Xiao Yu Date: Mon, 14 Feb 2022 23:40:36 +0000 Subject: [PATCH 04/11] More tests --- tests/core/test_configuration.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/core/test_configuration.py b/tests/core/test_configuration.py index 92324142234e3..0d9681ac17ec0 100644 --- a/tests/core/test_configuration.py +++ b/tests/core/test_configuration.py @@ -802,7 +802,9 @@ def test_as_dict_respects_sensitive_cmds(self): assert 'sql_alchemy_conn' in conf_materialize_cmds['core'] assert 'sql_alchemy_conn_cmd' not in conf_materialize_cmds['core'] - if conf_conn == test_conf.airflow_defaults['core']['sql_alchemy_conn']: + if conf_conn != test_conf.airflow_defaults['core']['sql_alchemy_conn']: + assert 'sql_alchemy_conn' in conf_materialize_cmds['core'] + else: assert conf_materialize_cmds['core']['sql_alchemy_conn'] == 'my-super-secret-conn' assert 'sql_alchemy_conn_cmd' in conf_maintain_cmds['core'] From 00641580fdc97c4329bfd4db763450471f93f896 Mon Sep 17 00:00:00 2001 From: Xiao Yu Date: Mon, 14 Feb 2022 23:49:59 +0000 Subject: [PATCH 05/11] Less double neg in test assertions --- tests/core/test_configuration.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/core/test_configuration.py b/tests/core/test_configuration.py index 0d9681ac17ec0..9974240f7a21f 100644 --- a/tests/core/test_configuration.py +++ b/tests/core/test_configuration.py @@ -802,14 +802,14 @@ def test_as_dict_respects_sensitive_cmds(self): assert 'sql_alchemy_conn' in conf_materialize_cmds['core'] assert 'sql_alchemy_conn_cmd' not in conf_materialize_cmds['core'] - if conf_conn != test_conf.airflow_defaults['core']['sql_alchemy_conn']: - assert 'sql_alchemy_conn' in conf_materialize_cmds['core'] - else: + if conf_conn == test_conf.airflow_defaults['core']['sql_alchemy_conn']: assert conf_materialize_cmds['core']['sql_alchemy_conn'] == 'my-super-secret-conn' assert 'sql_alchemy_conn_cmd' in conf_maintain_cmds['core'] assert conf_maintain_cmds['core']['sql_alchemy_conn_cmd'] == 'echo -n my-super-secret-conn' - if conf_conn != test_conf.airflow_defaults['core']['sql_alchemy_conn']: + if conf_conn == test_conf.airflow_defaults['core']['sql_alchemy_conn']: + assert 'sql_alchemy_conn' not in conf_maintain_cmds['core'] + else: assert 'sql_alchemy_conn' in conf_maintain_cmds['core'] assert conf_maintain_cmds['core']['sql_alchemy_conn'] == conf_conn From 905fefd54eae36a21a35ad4452219f309cc15b43 Mon Sep 17 00:00:00 2001 From: Xiao Yu Date: Tue, 15 Feb 2022 01:26:17 +0000 Subject: [PATCH 06/11] More Black --- tests/core/test_configuration.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/core/test_configuration.py b/tests/core/test_configuration.py index 9974240f7a21f..57eb1584952ad 100644 --- a/tests/core/test_configuration.py +++ b/tests/core/test_configuration.py @@ -782,7 +782,10 @@ def test_as_dict_works_without_sensitive_cmds(self): assert 'sql_alchemy_conn' in conf_maintain_cmds['core'] assert 'sql_alchemy_conn_cmd' not in conf_maintain_cmds['core'] - assert conf_materialize_cmds['core']['sql_alchemy_conn'] == conf_maintain_cmds['core']['sql_alchemy_conn'] + assert ( + conf_materialize_cmds['core']['sql_alchemy_conn'] + == conf_maintain_cmds['core']['sql_alchemy_conn'] + ) def test_as_dict_respects_sensitive_cmds(self): conf_conn = conf['core']['sql_alchemy_conn'] From 027a758565b242abbd645c38e45d25a65543eae2 Mon Sep 17 00:00:00 2001 From: Xiao Yu Date: Thu, 17 Feb 2022 20:13:18 +0000 Subject: [PATCH 07/11] Don't mess with global state --- tests/core/test_configuration.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tests/core/test_configuration.py b/tests/core/test_configuration.py index 57eb1584952ad..ab32e5e570ec7 100644 --- a/tests/core/test_configuration.py +++ b/tests/core/test_configuration.py @@ -15,6 +15,7 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +import copy import io import os import re @@ -771,10 +772,8 @@ def test_enum_logging_levels(self): assert message == exception def test_as_dict_works_without_sensitive_cmds(self): - test_conf = conf - - conf_materialize_cmds = test_conf.as_dict(display_sensitive=True, raw=True, include_cmds=True) - conf_maintain_cmds = test_conf.as_dict(display_sensitive=True, raw=True, include_cmds=False) + conf_materialize_cmds = conf.as_dict(display_sensitive=True, raw=True, include_cmds=True) + conf_maintain_cmds = conf.as_dict(display_sensitive=True, raw=True, include_cmds=False) assert 'sql_alchemy_conn' in conf_materialize_cmds['core'] assert 'sql_alchemy_conn_cmd' not in conf_materialize_cmds['core'] @@ -789,7 +788,7 @@ def test_as_dict_works_without_sensitive_cmds(self): def test_as_dict_respects_sensitive_cmds(self): conf_conn = conf['core']['sql_alchemy_conn'] - test_conf = conf + test_conf = copy.copy(conf) test_conf.read_string( textwrap.dedent( """ From d85ac8e9c446e6da1b2149b168aa3a75c55a63db Mon Sep 17 00:00:00 2001 From: Xiao Yu Date: Thu, 17 Feb 2022 21:15:41 +0000 Subject: [PATCH 08/11] Wrap getter in a try/catch --- airflow/configuration.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/airflow/configuration.py b/airflow/configuration.py index e66c40a0fec4d..861b60799c9ed 100644 --- a/airflow/configuration.py +++ b/airflow/configuration.py @@ -657,7 +657,7 @@ def as_dict( if include_secret: self._include_secrets(config_sources, display_sensitive, display_source, raw) else: - self._filter_by_source(config_sources, display_source, self._get_env_var_option) + self._filter_by_source(config_sources, display_source, self._get_secret_option) return config_sources @@ -718,11 +718,16 @@ def _include_envs(self, config_sources, display_sensitive, display_source, raw): def _filter_by_source(self, config_sources, display_source, getter_func): for (section, key) in self.sensitive_config_values: - opt = getter_func(section, key) - if not opt: - continue + # Don't bother if we don't have section / key if section not in config_sources or key not in config_sources[section]: continue + # Check that there is something to override defaults + try: + opt = getter_func(section, key) + except ValueError: + continue + if not opt: + continue if display_source: opt, source = config_sources[section][key] else: From bf8bf03980d994c2d1b276bd66e02127dfdb9b44 Mon Sep 17 00:00:00 2001 From: Xiao Yu Date: Thu, 17 Feb 2022 23:03:26 +0000 Subject: [PATCH 09/11] Catch errors, avoid leakage --- airflow/configuration.py | 8 ++++++-- tests/core/test_configuration.py | 2 +- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/airflow/configuration.py b/airflow/configuration.py index 861b60799c9ed..e3ffc406022a3 100644 --- a/airflow/configuration.py +++ b/airflow/configuration.py @@ -723,11 +723,15 @@ def _filter_by_source(self, config_sources, display_source, getter_func): continue # Check that there is something to override defaults try: - opt = getter_func(section, key) + getter_opt = getter_func(section, key) except ValueError: continue - if not opt: + if not getter_opt: + continue + # Check to see that there is a default value + if not self.airflow_defaults.has_option(section, key): continue + # Check to see if bare setting is the same as defaults if display_source: opt, source = config_sources[section][key] else: diff --git a/tests/core/test_configuration.py b/tests/core/test_configuration.py index ab32e5e570ec7..e10004b408395 100644 --- a/tests/core/test_configuration.py +++ b/tests/core/test_configuration.py @@ -788,7 +788,7 @@ def test_as_dict_works_without_sensitive_cmds(self): def test_as_dict_respects_sensitive_cmds(self): conf_conn = conf['core']['sql_alchemy_conn'] - test_conf = copy.copy(conf) + test_conf = copy.deepcopy(conf) test_conf.read_string( textwrap.dedent( """ From 2fa8198e3dd5c78f9abf27aebf188d98a78cea87 Mon Sep 17 00:00:00 2001 From: Xiao Yu Date: Thu, 3 Mar 2022 18:30:43 +0000 Subject: [PATCH 10/11] Add docstrings to explain config filtering, props @potiuk, @uranusjr --- airflow/configuration.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/airflow/configuration.py b/airflow/configuration.py index 9e5a686f6c7d9..340d2d4f6fd74 100644 --- a/airflow/configuration.py +++ b/airflow/configuration.py @@ -680,6 +680,15 @@ def as_dict( """ Returns the current configuration as an OrderedDict of OrderedDicts. + When materializing current configuration Airflow defaults are + materialized along with user set configs. If any of the `include_*` + options are False then the result of calling command or secret key + configs do not override Airflow defaults and instead are passed through. + In order to then avoid Airflow defaults from overwriting user set + command or secret key configs we filter out bare sensitive_config_values + that are set to Airflow defaults when command or secret key configs + produce different values. + :param display_source: If False, the option value is returned. If True, a tuple of (option_value, source) is returned. Source is either 'airflow.cfg', 'default', 'env var', or 'cmd'. @@ -784,6 +793,25 @@ def _include_envs(self, config_sources, display_sensitive, display_source, raw): config_sources.setdefault(section, OrderedDict()).update({key: opt}) def _filter_by_source(self, config_sources, display_source, getter_func): + """ + Deletes default configs from current configuration (an OrderedDict of + OrderedDicts) if it would conflict with special sensitive_config_values. + + This is necessary because bare configs take precedence over the command + or secret key equivalents so if the current running config is + materialized with Airflow defaults they in turn override user set + command or secret key configs. + + :param config_sources: The current configuration to operate on + :param display_source: If False, configuration options contain raw + values. If True, options are a tuple of (option_value, source). + Source is either 'airflow.cfg', 'default', 'env var', or 'cmd'. + :param getter_func: A callback function that gets the user configured + override value for a particular sensitive_config_values config. + :rtype: None + :return: None, the given config_sources is filtered if necessary, + otherwise untouched. + """ for (section, key) in self.sensitive_config_values: # Don't bother if we don't have section / key if section not in config_sources or key not in config_sources[section]: From 46ece2dba7b43ff5f8adeb71a4e950dea5b1be6f Mon Sep 17 00:00:00 2001 From: Xiao Yu Date: Mon, 14 Mar 2022 21:35:46 +0000 Subject: [PATCH 11/11] Add note about differing config precedence in some versions --- docs/apache-airflow/howto/set-config.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/apache-airflow/howto/set-config.rst b/docs/apache-airflow/howto/set-config.rst index b0702af051cd1..a31885b2da1b5 100644 --- a/docs/apache-airflow/howto/set-config.rst +++ b/docs/apache-airflow/howto/set-config.rst @@ -100,6 +100,10 @@ The universal order of precedence for all configuration options is as follows: #. secret key in ``airflow.cfg`` #. Airflow's built in defaults +.. note:: + For Airflow versions >= 2.2.1, < 2.3.0 Airflow's built in defaults took precedence + over command and secret key in ``airflow.cfg`` in some circumstances. + You can check the current configuration with the ``airflow config list`` command. If you only want to see the value for one option, you can use ``airflow config get-value`` command as in