diff --git a/airflow/config_templates/config.yml b/airflow/config_templates/config.yml index 9c9f7c2153c2..068b19df079e 100644 --- a/airflow/config_templates/config.yml +++ b/airflow/config_templates/config.yml @@ -1073,20 +1073,9 @@ metrics: description: | `StatsD `__ integration settings. options: - metrics_use_pattern_match: - description: | - If true, ``[metrics] metrics_allow_list`` and ``[metrics] metrics_block_list`` will use - regex pattern matching anywhere within the metric name instead of only prefix matching - at the start of the name. - version_added: 2.9.0 - type: boolean - example: ~ - default: "False" metrics_allow_list: description: | - Configure an allow list (comma separated string) to send only certain metrics. - If ``[metrics] metrics_use_pattern_match`` is ``false``, match only the exact metric name prefix. - If ``[metrics] metrics_use_pattern_match`` is ``true``, provide regex patterns to match. + Configure an allow list (comma separated regex patterns to match) to send only certain metrics. version_added: 2.6.0 type: string example: "\"scheduler,executor,dagrun,pool,triggerer,celery\" @@ -1094,13 +1083,11 @@ metrics: default: "" metrics_block_list: description: | - Configure a block list (comma separated string) to block certain metrics from being emitted. + Configure a block list (comma separated regex patterns to match) to block certain metrics + from being emitted. If ``[metrics] metrics_allow_list`` and ``[metrics] metrics_block_list`` are both configured, ``[metrics] metrics_block_list`` is ignored. - If ``[metrics] metrics_use_pattern_match`` is ``false``, match only the exact metric name prefix. - - If ``[metrics] metrics_use_pattern_match`` is ``true``, provide regex patterns to match. version_added: 2.6.0 type: string example: "\"scheduler,executor,dagrun,pool,triggerer,celery\" diff --git a/airflow/metrics/datadog_logger.py b/airflow/metrics/datadog_logger.py index c7bcf1986d85..60b5424a6f5c 100644 --- a/airflow/metrics/datadog_logger.py +++ b/airflow/metrics/datadog_logger.py @@ -26,8 +26,8 @@ from airflow.exceptions import AirflowProviderDeprecationWarning from airflow.metrics.protocols import Timer from airflow.metrics.validators import ( - AllowListValidator, - BlockListValidator, + PatternAllowListValidator, + PatternBlockListValidator, get_validator, validate_stat, ) @@ -57,9 +57,9 @@ class SafeDogStatsdLogger: def __init__( self, dogstatsd_client: DogStatsd, - metrics_validator: ListValidator = AllowListValidator(), + metrics_validator: ListValidator = PatternAllowListValidator(), metrics_tags: bool = False, - metric_tags_validator: ListValidator = AllowListValidator(), + metric_tags_validator: ListValidator = PatternAllowListValidator(), ) -> None: self.dogstatsd = dogstatsd_client self.metrics_validator = metrics_validator @@ -181,5 +181,7 @@ def get_dogstatsd_logger(cls) -> SafeDogStatsdLogger: constant_tags=cls.get_constant_tags(), ) datadog_metrics_tags = conf.getboolean("metrics", "statsd_datadog_metrics_tags", fallback=True) - metric_tags_validator = BlockListValidator(conf.get("metrics", "statsd_disabled_tags", fallback=None)) + metric_tags_validator = PatternBlockListValidator( + conf.get("metrics", "statsd_disabled_tags", fallback=None) + ) return SafeDogStatsdLogger(dogstatsd, get_validator(), datadog_metrics_tags, metric_tags_validator) diff --git a/airflow/metrics/otel_logger.py b/airflow/metrics/otel_logger.py index e8d0f54d7328..594046703eb1 100644 --- a/airflow/metrics/otel_logger.py +++ b/airflow/metrics/otel_logger.py @@ -35,8 +35,8 @@ from airflow.metrics.protocols import Timer from airflow.metrics.validators import ( OTEL_NAME_MAX_LENGTH, - AllowListValidator, ListValidator, + PatternAllowListValidator, get_validator, stat_name_otel_handler, ) @@ -179,7 +179,7 @@ def __init__( self, otel_provider, prefix: str = DEFAULT_METRIC_NAME_PREFIX, - metrics_validator: ListValidator = AllowListValidator(), + metrics_validator: ListValidator = PatternAllowListValidator(), ): self.otel: Callable = otel_provider self.prefix: str = prefix diff --git a/airflow/metrics/statsd_logger.py b/airflow/metrics/statsd_logger.py index ecd6cfc231ad..cb3266e6105f 100644 --- a/airflow/metrics/statsd_logger.py +++ b/airflow/metrics/statsd_logger.py @@ -25,8 +25,8 @@ from airflow.exceptions import AirflowConfigException from airflow.metrics.protocols import Timer from airflow.metrics.validators import ( - AllowListValidator, - BlockListValidator, + PatternAllowListValidator, + PatternBlockListValidator, get_validator, validate_stat, ) @@ -70,9 +70,9 @@ class SafeStatsdLogger: def __init__( self, statsd_client: StatsClient, - metrics_validator: ListValidator = AllowListValidator(), + metrics_validator: ListValidator = PatternAllowListValidator(), influxdb_tags_enabled: bool = False, - metric_tags_validator: ListValidator = AllowListValidator(), + metric_tags_validator: ListValidator = PatternAllowListValidator(), ) -> None: self.statsd = statsd_client self.metrics_validator = metrics_validator @@ -180,5 +180,7 @@ def get_statsd_logger(cls) -> SafeStatsdLogger: ) influxdb_tags_enabled = conf.getboolean("metrics", "statsd_influxdb_enabled", fallback=False) - metric_tags_validator = BlockListValidator(conf.get("metrics", "statsd_disabled_tags", fallback=None)) + metric_tags_validator = PatternBlockListValidator( + conf.get("metrics", "statsd_disabled_tags", fallback=None) + ) return SafeStatsdLogger(statsd, get_validator(), influxdb_tags_enabled, metric_tags_validator) diff --git a/airflow/metrics/validators.py b/airflow/metrics/validators.py index 111ad9b87df6..b47cdac1be55 100644 --- a/airflow/metrics/validators.py +++ b/airflow/metrics/validators.py @@ -29,7 +29,7 @@ import re2 from airflow.configuration import conf -from airflow.exceptions import InvalidStatsNameException, RemovedInAirflow3Warning +from airflow.exceptions import InvalidStatsNameException log = logging.getLogger(__name__) @@ -88,25 +88,14 @@ class MetricNameLengthExemptionWarning(Warning): def get_validator() -> ListValidator: validators = { - "basic": {"allow": AllowListValidator, "block": BlockListValidator}, - "pattern": {"allow": PatternAllowListValidator, "block": PatternBlockListValidator}, + "allow": PatternAllowListValidator, + "block": PatternBlockListValidator, } metric_lists = { "allow": (metric_allow_list := conf.get("metrics", "metrics_allow_list", fallback=None)), "block": (metric_block_list := conf.get("metrics", "metrics_block_list", fallback=None)), } - use_pattern = conf.getboolean("metrics", "metrics_use_pattern_match", fallback=False) - validator_type = "pattern" if use_pattern else "basic" - - if not use_pattern: - warnings.warn( - "The basic metric validator will be deprecated in the future in favor of pattern-matching. " - "You can try this now by setting config option metrics_use_pattern_match to True.", - RemovedInAirflow3Warning, - stacklevel=2, - ) - if metric_allow_list: list_type = "allow" if metric_block_list: @@ -118,7 +107,7 @@ def get_validator() -> ListValidator: else: list_type = DEFAULT_VALIDATOR_TYPE - return validators[validator_type][list_type](metric_lists[list_type]) + return validators[list_type](metric_lists[list_type]) def validate_stat(fn: Callable) -> Callable: @@ -263,16 +252,6 @@ def _has_pattern_match(self, name: str) -> bool: return False -class AllowListValidator(ListValidator): - """AllowListValidator only allows names that match the allowed prefixes.""" - - def test(self, name: str) -> bool: - if self.validate_list is not None: - return name.strip().lower().startswith(self.validate_list) - else: - return True # default is all metrics are allowed - - class PatternAllowListValidator(ListValidator): """Match the provided strings anywhere in the metric name.""" @@ -283,16 +262,6 @@ def test(self, name: str) -> bool: return True # default is all metrics are allowed -class BlockListValidator(ListValidator): - """BlockListValidator only allows names that do not match the blocked prefixes.""" - - def test(self, name: str) -> bool: - if self.validate_list is not None: - return not name.strip().lower().startswith(self.validate_list) - else: - return True # default is all metrics are allowed - - class PatternBlockListValidator(ListValidator): """Only allow names that do not match the blocked strings.""" diff --git a/newsfragments/41975.significant.rst b/newsfragments/41975.significant.rst new file mode 100644 index 000000000000..7931329dd6c2 --- /dev/null +++ b/newsfragments/41975.significant.rst @@ -0,0 +1 @@ +Metrics basic deprecated validators (``AllowListValidator`` and ``BlockListValidator``) were removed in favor of pattern matching. Pattern matching validators (``PatternAllowListValidator`` and ``PatternBlockListValidator``) are enabled by default. Configuration parameter ``metrics_use_pattern_match``was removed from the ``metrics`` section. diff --git a/tests/core/test_stats.py b/tests/core/test_stats.py index 5127b95927a8..ec7ff0cab08c 100644 --- a/tests/core/test_stats.py +++ b/tests/core/test_stats.py @@ -28,13 +28,11 @@ import statsd import airflow -from airflow.exceptions import AirflowConfigException, InvalidStatsNameException, RemovedInAirflow3Warning +from airflow.exceptions import AirflowConfigException, InvalidStatsNameException from airflow.metrics import datadog_logger, protocols from airflow.metrics.datadog_logger import SafeDogStatsdLogger from airflow.metrics.statsd_logger import SafeStatsdLogger from airflow.metrics.validators import ( - AllowListValidator, - BlockListValidator, PatternAllowListValidator, PatternBlockListValidator, ) @@ -101,7 +99,7 @@ def test_decr(self): def test_enabled_by_config(self): """Test that enabling this sets the right instance properties""" - with conf_vars({("metrics", "statsd_on"): "True", ("metrics", "metrics_use_pattern_match"): "True"}): + with conf_vars({("metrics", "statsd_on"): "True"}): importlib.reload(airflow.stats) assert isinstance(airflow.stats.Stats.statsd, statsd.StatsClient) assert not hasattr(airflow.stats.Stats, "dogstatsd") @@ -113,7 +111,6 @@ def test_load_custom_statsd_client(self): { ("metrics", "statsd_on"): "True", ("metrics", "statsd_custom_client_path"): f"{__name__}.CustomStatsd", - ("metrics", "metrics_use_pattern_match"): "True", } ): importlib.reload(airflow.stats) @@ -142,11 +139,10 @@ def test_load_allow_list_validator(self): { ("metrics", "statsd_on"): "True", ("metrics", "metrics_allow_list"): "name1,name2", - ("metrics", "metrics_use_pattern_match"): "True", } ): importlib.reload(airflow.stats) - assert isinstance(airflow.stats.Stats.metrics_validator, AllowListValidator) + assert type(airflow.stats.Stats.metrics_validator) is PatternAllowListValidator assert airflow.stats.Stats.metrics_validator.validate_list == ("name1", "name2") # Avoid side-effects importlib.reload(airflow.stats) @@ -156,11 +152,10 @@ def test_load_block_list_validator(self): { ("metrics", "statsd_on"): "True", ("metrics", "metrics_block_list"): "name1,name2", - ("metrics", "metrics_use_pattern_match"): "True", } ): importlib.reload(airflow.stats) - assert isinstance(airflow.stats.Stats.metrics_validator, BlockListValidator) + assert type(airflow.stats.Stats.metrics_validator) is PatternBlockListValidator assert airflow.stats.Stats.metrics_validator.validate_list == ("name1", "name2") # Avoid side-effects importlib.reload(airflow.stats) @@ -171,11 +166,10 @@ def test_load_allow_and_block_list_validator_loads_only_allow_list_validator(sel ("metrics", "statsd_on"): "True", ("metrics", "metrics_allow_list"): "name1,name2", ("metrics", "metrics_block_list"): "name1,name2", - ("metrics", "metrics_use_pattern_match"): "True", } ): importlib.reload(airflow.stats) - assert isinstance(airflow.stats.Stats.metrics_validator, AllowListValidator) + assert type(airflow.stats.Stats.metrics_validator) is PatternAllowListValidator assert airflow.stats.Stats.metrics_validator.validate_list == ("name1", "name2") # Avoid side-effects importlib.reload(airflow.stats) @@ -279,9 +273,7 @@ def test_enabled_by_config(self): """Test that enabling this sets the right instance properties""" from datadog import DogStatsd - with conf_vars( - {("metrics", "statsd_datadog_enabled"): "True", ("metrics", "metrics_use_pattern_match"): "True"} - ): + with conf_vars({("metrics", "statsd_datadog_enabled"): "True"}): importlib.reload(airflow.stats) assert isinstance(airflow.stats.Stats.dogstatsd, DogStatsd) assert not hasattr(airflow.stats.Stats, "statsd") @@ -295,7 +287,6 @@ def test_does_not_send_stats_using_statsd_when_statsd_and_dogstatsd_both_on(self { ("metrics", "statsd_on"): "True", ("metrics", "statsd_datadog_enabled"): "True", - ("metrics", "metrics_use_pattern_match"): "True", } ): importlib.reload(airflow.stats) @@ -313,22 +304,12 @@ class TestStatsAllowAndBlockLists: (PatternAllowListValidator, "stats_three.foo", True), (PatternAllowListValidator, "stats_foo_three", True), (PatternAllowListValidator, "stats_three", False), - (AllowListValidator, "stats_one", True), - (AllowListValidator, "stats_two.bla", True), - (AllowListValidator, "stats_three.foo", False), - (AllowListValidator, "stats_foo_three", False), - (AllowListValidator, "stats_three", False), (PatternBlockListValidator, "stats_one", False), (PatternBlockListValidator, "stats_two.bla", False), (PatternBlockListValidator, "stats_three.foo", False), (PatternBlockListValidator, "stats_foo_three", False), (PatternBlockListValidator, "stats_foo", False), (PatternBlockListValidator, "stats_three", True), - (BlockListValidator, "stats_one", False), - (BlockListValidator, "stats_two.bla", False), - (BlockListValidator, "stats_three.foo", True), - (BlockListValidator, "stats_foo_three", True), - (BlockListValidator, "stats_three", True), ], ) def test_allow_and_block_list(self, validator, stat_name, expect_incr): @@ -367,14 +348,12 @@ def test_regex_matches(self, match_pattern, expect_incr): statsd_client.assert_not_called() -class TestPatternOrBasicValidatorConfigOption: +class TestPatternValidatorConfigOption: def teardown_method(self): # Avoid side-effects importlib.reload(airflow.stats) stats_on = {("metrics", "statsd_on"): "True"} - pattern_on = {("metrics", "metrics_use_pattern_match"): "True"} - pattern_off = {("metrics", "metrics_use_pattern_match"): "False"} allow_list = {("metrics", "metrics_allow_list"): "foo,bar"} block_list = {("metrics", "metrics_block_list"): "foo,bar"} @@ -382,61 +361,40 @@ def teardown_method(self): "config, expected", [ pytest.param( - {**stats_on, **pattern_on}, + {**stats_on}, PatternAllowListValidator, id="pattern_allow_by_default", ), pytest.param( - stats_on, - AllowListValidator, - id="basic_allow_by_default", - ), - pytest.param( - {**stats_on, **pattern_on, **allow_list}, + {**stats_on, **allow_list}, PatternAllowListValidator, id="pattern_allow_list_provided", ), pytest.param( - {**stats_on, **pattern_off, **allow_list}, - AllowListValidator, - id="basic_allow_list_provided", - ), - pytest.param( - {**stats_on, **pattern_on, **block_list}, + {**stats_on, **block_list}, PatternBlockListValidator, id="pattern_block_list_provided", ), pytest.param( - {**stats_on, **block_list}, - BlockListValidator, - id="basic_block_list_provided", + {**stats_on, **allow_list, **block_list}, + PatternAllowListValidator, + id="pattern_block_list_provided", ), ], ) - def test_pattern_or_basic_picker(self, config, expected): + def test_pattern_picker(self, config, expected): with conf_vars(config): importlib.reload(airflow.stats) - if eval(config.get(("metrics", "metrics_use_pattern_match"), "False")): - assert isinstance(airflow.stats.Stats.statsd, statsd.StatsClient) - else: - with pytest.warns( - RemovedInAirflow3Warning, - match="The basic metric validator will be deprecated in the future in favor of pattern-matching. You can try this now by setting config option metrics_use_pattern_match to True.", - ): - assert isinstance(airflow.stats.Stats.statsd, statsd.StatsClient) - assert isinstance(airflow.stats.Stats.instance.metrics_validator, expected) - - @conf_vars({**stats_on, **block_list, ("metrics", "metrics_allow_list"): "bax,qux"}) + assert isinstance(airflow.stats.Stats.statsd, statsd.StatsClient) + assert type(airflow.stats.Stats.instance.metrics_validator) is expected + + @conf_vars({**stats_on, **block_list, ("metrics", "metrics_allow_list"): "baz,qux"}) def test_setting_allow_and_block_logs_warning(self, caplog): importlib.reload(airflow.stats) - with pytest.warns( - RemovedInAirflow3Warning, - match="The basic metric validator will be deprecated in the future in favor of pattern-matching. You can try this now by setting config option metrics_use_pattern_match to True.", - ): - assert isinstance(airflow.stats.Stats.statsd, statsd.StatsClient) - assert isinstance(airflow.stats.Stats.instance.metrics_validator, AllowListValidator) + assert isinstance(airflow.stats.Stats.statsd, statsd.StatsClient) + assert type(airflow.stats.Stats.instance.metrics_validator) is PatternAllowListValidator with caplog.at_level(logging.WARNING): assert "Ignoring metrics_block_list" in caplog.text @@ -447,7 +405,9 @@ def setup_method(self): from datadog import DogStatsd self.dogstatsd_client = Mock(speck=DogStatsd) - self.dogstats = SafeDogStatsdLogger(self.dogstatsd_client, AllowListValidator("stats_one, stats_two")) + self.dogstats = SafeDogStatsdLogger( + self.dogstatsd_client, PatternAllowListValidator("stats_one, stats_two") + ) def test_increment_counter_with_allowed_key(self): self.dogstats.incr("stats_one") @@ -490,7 +450,7 @@ def setup_method(self): self.dogstatsd = SafeDogStatsdLogger( self.dogstatsd_client, metrics_tags=True, - metric_tags_validator=BlockListValidator("key1"), + metric_tags_validator=PatternBlockListValidator("key1"), ) def test_does_send_stats_using_dogstatsd_with_tags(self): @@ -512,7 +472,7 @@ def setup_method(self): self.stats = SafeStatsdLogger( self.statsd_client, influxdb_tags_enabled=True, - metric_tags_validator=BlockListValidator("key2,key3"), + metric_tags_validator=PatternBlockListValidator("key2,key3"), ) def test_increment_counter(self): @@ -548,7 +508,6 @@ class TestCustomStatsName: @conf_vars( { ("metrics", "statsd_on"): "True", - ("metrics", "metrics_use_pattern_match"): "True", ("metrics", "stat_name_handler"): "tests.core.test_stats.always_invalid", } ) @@ -561,7 +520,6 @@ def test_does_not_send_stats_using_statsd_when_the_name_is_not_valid(self, mock_ @conf_vars( { ("metrics", "statsd_datadog_enabled"): "True", - ("metrics", "metrics_use_pattern_match"): "True", ("metrics", "stat_name_handler"): "tests.core.test_stats.always_invalid", } ) @@ -575,7 +533,6 @@ def test_does_not_send_stats_using_dogstatsd_when_the_name_is_not_valid(self, mo { ("metrics", "statsd_on"): "True", ("metrics", "stat_name_handler"): "tests.core.test_stats.always_valid", - ("metrics", "metrics_use_pattern_match"): "True", } ) @mock.patch("statsd.StatsClient") @@ -587,7 +544,6 @@ def test_does_send_stats_using_statsd_when_the_name_is_valid(self, mock_statsd): @conf_vars( { ("metrics", "statsd_datadog_enabled"): "True", - ("metrics", "metrics_use_pattern_match"): "True", ("metrics", "stat_name_handler"): "tests.core.test_stats.always_valid", } )