From 4cad3c0f54c75f8c8a3d5ea8514b61cd4d7cd8a4 Mon Sep 17 00:00:00 2001 From: Shahar Epstein <60007259+shahar1@users.noreply.github.com> Date: Fri, 6 Dec 2024 22:05:06 +0200 Subject: [PATCH 01/25] Fix #43349 newsfragment (#44741) --- .pre-commit-config.yaml | 2 +- newsfragments/{43368.significant.rst => 43349.significant.rst} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename newsfragments/{43368.significant.rst => 43349.significant.rst} (100%) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index cb0071a751fab..49e74d453d09d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -675,7 +675,7 @@ repos: ^contributing-docs/03_contributors_quick_start.rst$| ^.*\.(png|gif|jp[e]?g|tgz|lock)$| git| - ^newsfragments/43368\.significant\.rst$ + ^newsfragments/43349\.significant\.rst$ - id: check-base-operator-partial-arguments name: Check BaseOperator and partial() arguments language: python diff --git a/newsfragments/43368.significant.rst b/newsfragments/43349.significant.rst similarity index 100% rename from newsfragments/43368.significant.rst rename to newsfragments/43349.significant.rst From abb2fa7a02e18f0883b4c9ca73ffb14ce0af9333 Mon Sep 17 00:00:00 2001 From: Vincent <97131062+vincbeck@users.noreply.github.com> Date: Fri, 6 Dec 2024 15:22:51 -0500 Subject: [PATCH 02/25] Fix main (#44747) --- airflow/ui/src/layouts/Nav/Nav.tsx | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/airflow/ui/src/layouts/Nav/Nav.tsx b/airflow/ui/src/layouts/Nav/Nav.tsx index c17e57609061f..f716d612e8af4 100644 --- a/airflow/ui/src/layouts/Nav/Nav.tsx +++ b/airflow/ui/src/layouts/Nav/Nav.tsx @@ -20,7 +20,6 @@ import { Box, Flex, VStack, Link } from "@chakra-ui/react"; import { FiCornerUpLeft, FiDatabase, FiHome, FiSettings } from "react-icons/fi"; import { useVersionServiceGetVersion } from "openapi/queries"; - import { AirflowPin } from "src/assets/AirflowPin"; import { DagIcon } from "src/assets/DagIcon"; @@ -89,4 +88,4 @@ export const Nav = () => { ); -} +}; From 696cb29f3bb5f0a1a947d183688cbd4f30bfeb3f Mon Sep 17 00:00:00 2001 From: Pratiksha <128999446+Prab-27@users.noreply.github.com> Date: Sat, 7 Dec 2024 02:52:37 +0530 Subject: [PATCH 03/25] Removed deprecated code from hashicorp provider (#44598) * Remove deprecated code from HashiCorp provider * Remove additional deprecated code from hashicorp provider * remove test code for deprecated method * fix if clause for approle authentication * modified changelog --------- Co-authored-by: pratiksha rajendrabhai badheka --- .../airflow/providers/hashicorp/CHANGELOG.rst | 12 + .../providers/hashicorp/hooks/vault.py | 25 +- .../providers/hashicorp/secrets/vault.py | 19 -- .../tests/hashicorp/secrets/test_vault.py | 216 ------------------ 4 files changed, 15 insertions(+), 257 deletions(-) diff --git a/providers/src/airflow/providers/hashicorp/CHANGELOG.rst b/providers/src/airflow/providers/hashicorp/CHANGELOG.rst index 032f9d565c85a..0c80950db363f 100644 --- a/providers/src/airflow/providers/hashicorp/CHANGELOG.rst +++ b/providers/src/airflow/providers/hashicorp/CHANGELOG.rst @@ -27,6 +27,18 @@ Changelog --------- +main +..... + +.. warning:: + All deprecated classes, parameters and features have been removed from the hashicorp provider package. + The following breaking changes were introduced: + + * The usage of role_id for AppRole authentication has been deprecated from airflow.providers.hashicorp.hook.vault .Please use connection login + * The usage of role_id in connection extra for AppRole authentication has been deprecated from airflow.providers.hashicorp.hook.vault. Please use connection login + * Removed role_id from get_connection_form_widgets + * Removed deprecated method ``VaultBackend.get_conn_uri`` from airflow.providers.hashicorp.secrets.vault + 3.8.0 ..... diff --git a/providers/src/airflow/providers/hashicorp/hooks/vault.py b/providers/src/airflow/providers/hashicorp/hooks/vault.py index ccee598d6cc14..37e409f388af2 100644 --- a/providers/src/airflow/providers/hashicorp/hooks/vault.py +++ b/providers/src/airflow/providers/hashicorp/hooks/vault.py @@ -19,12 +19,10 @@ from __future__ import annotations import json -import warnings from typing import TYPE_CHECKING, Any from hvac.exceptions import VaultError -from airflow.exceptions import AirflowProviderDeprecationWarning from airflow.hooks.base import BaseHook from airflow.providers.hashicorp._internal_client.vault_client import ( DEFAULT_KUBERNETES_JWT_PATH, @@ -70,7 +68,7 @@ class VaultHook(BaseHook): Login/Password are used as credentials: - * approle: login -> role_id, password -> secret_id + * approle: login -> connection.login * github: password -> token * token: password -> token * aws_iam: login -> key_id, password -> secret_id @@ -147,24 +145,8 @@ def __init__( if kwargs: client_kwargs = merge_dicts(client_kwargs, kwargs) - if auth_type == "approle": - if role_id: - warnings.warn( - """The usage of role_id for AppRole authentication has been deprecated. - Please use connection login.""", - AirflowProviderDeprecationWarning, - stacklevel=2, - ) - elif self.connection.extra_dejson.get("role_id"): - role_id = self.connection.extra_dejson.get("role_id") - warnings.warn( - """The usage of role_id in connection extra for AppRole authentication has been - deprecated. Please use connection login.""", - AirflowProviderDeprecationWarning, - stacklevel=2, - ) - elif self.connection.login: - role_id = self.connection.login + if auth_type == "approle" and self.connection.login: + role_id = self.connection.login if auth_type == "aws_iam": if not role_id: @@ -385,7 +367,6 @@ def get_connection_form_widgets(cls) -> dict[str, Any]: description="Must be 1 or 2.", default=DEFAULT_KV_ENGINE_VERSION, ), - "role_id": StringField(lazy_gettext("Role ID (deprecated)"), widget=BS3TextFieldWidget()), "kubernetes_role": StringField(lazy_gettext("Kubernetes role"), widget=BS3TextFieldWidget()), "kubernetes_jwt_path": StringField( lazy_gettext("Kubernetes jwt path"), widget=BS3TextFieldWidget() diff --git a/providers/src/airflow/providers/hashicorp/secrets/vault.py b/providers/src/airflow/providers/hashicorp/secrets/vault.py index 2591c77652e26..275c0dc79baa0 100644 --- a/providers/src/airflow/providers/hashicorp/secrets/vault.py +++ b/providers/src/airflow/providers/hashicorp/secrets/vault.py @@ -21,9 +21,6 @@ from typing import TYPE_CHECKING -from deprecated import deprecated - -from airflow.exceptions import AirflowProviderDeprecationWarning from airflow.providers.hashicorp._internal_client.vault_client import _VaultClient from airflow.secrets import BaseSecretsBackend from airflow.utils.log.logging_mixin import LoggingMixin @@ -191,22 +188,6 @@ def get_response(self, conn_id: str) -> dict | None: secret_path=(mount_point + "/" if mount_point else "") + secret_path ) - @deprecated( - reason="Method `VaultBackend.get_conn_uri` is deprecated and will be removed in a future release.", - category=AirflowProviderDeprecationWarning, - ) - def get_conn_uri(self, conn_id: str) -> str | None: - """ - Get serialized representation of connection. - - :param conn_id: The connection id - :return: The connection uri retrieved from the secret - """ - # Since VaultBackend implements `get_connection`, `get_conn_uri` is not used. So we - # don't need to implement (or direct users to use) method `get_conn_value` instead - response = self.get_response(conn_id) - return response.get("conn_uri") if response else None - # Make sure connection is imported this way for type checking, otherwise when importing # the backend it will get a circular dependency and fail if TYPE_CHECKING: diff --git a/providers/tests/hashicorp/secrets/test_vault.py b/providers/tests/hashicorp/secrets/test_vault.py index 14834e2cbd07a..8b5e1b3cfc4df 100644 --- a/providers/tests/hashicorp/secrets/test_vault.py +++ b/providers/tests/hashicorp/secrets/test_vault.py @@ -21,89 +21,10 @@ import pytest from hvac.exceptions import InvalidPath, VaultError -from airflow.exceptions import AirflowProviderDeprecationWarning from airflow.providers.hashicorp.secrets.vault import VaultBackend class TestVaultSecrets: - @mock.patch("airflow.providers.hashicorp._internal_client.vault_client.hvac") - def test_get_conn_uri(self, mock_hvac): - mock_client = mock.MagicMock() - mock_hvac.Client.return_value = mock_client - mock_client.secrets.kv.v2.read_secret_version.return_value = { - "request_id": "94011e25-f8dc-ec29-221b-1f9c1d9ad2ae", - "lease_id": "", - "renewable": False, - "lease_duration": 0, - "data": { - "data": {"conn_uri": "postgresql://airflow:airflow@host:5432/airflow"}, - "metadata": { - "created_time": "2020-03-16T21:01:43.331126Z", - "deletion_time": "", - "destroyed": False, - "version": 1, - }, - }, - "wrap_info": None, - "warnings": None, - "auth": None, - } - - kwargs = { - "connections_path": "connections", - "mount_point": "airflow", - "auth_type": "token", - "url": "http://127.0.0.1:8200", - "token": "s.7AU0I51yv1Q1lxOIg1F3ZRAS", - } - - test_client = VaultBackend(**kwargs) - with pytest.warns( - AirflowProviderDeprecationWarning, - match="Method `VaultBackend.get_conn_uri` is deprecated and will be removed in a future release.", - ): - returned_uri = test_client.get_conn_uri(conn_id="test_postgres") - assert returned_uri == "postgresql://airflow:airflow@host:5432/airflow" - - @mock.patch("airflow.providers.hashicorp._internal_client.vault_client.hvac") - def test_get_conn_uri_without_predefined_mount_point(self, mock_hvac): - mock_client = mock.MagicMock() - mock_hvac.Client.return_value = mock_client - mock_client.secrets.kv.v2.read_secret_version.return_value = { - "request_id": "94011e25-f8dc-ec29-221b-1f9c1d9ad2ae", - "lease_id": "", - "renewable": False, - "lease_duration": 0, - "data": { - "data": {"conn_uri": "postgresql://airflow:airflow@host:5432/airflow"}, - "metadata": { - "created_time": "2020-03-16T21:01:43.331126Z", - "deletion_time": "", - "destroyed": False, - "version": 1, - }, - }, - "wrap_info": None, - "warnings": None, - "auth": None, - } - - kwargs = { - "connections_path": "connections", - "mount_point": None, - "auth_type": "token", - "url": "http://127.0.0.1:8200", - "token": "s.7AU0I51yv1Q1lxOIg1F3ZRAS", - } - - test_client = VaultBackend(**kwargs) - with pytest.warns( - AirflowProviderDeprecationWarning, - match="Method `VaultBackend.get_conn_uri` is deprecated and will be removed in a future release.", - ): - returned_uri = test_client.get_conn_uri(conn_id="airflow/test_postgres") - assert returned_uri == "postgresql://airflow:airflow@host:5432/airflow" - @mock.patch("airflow.providers.hashicorp._internal_client.vault_client.hvac") def test_get_connection(self, mock_hvac): mock_client = mock.MagicMock() @@ -190,143 +111,6 @@ def test_get_connection_without_predefined_mount_point(self, mock_hvac): connection = test_client.get_connection(conn_id="airflow/test_postgres") assert connection.get_uri() == "postgresql://airflow:airflow@host:5432/airflow?foo=bar&baz=taz" - @pytest.mark.parametrize( - "mount_point, connections_path, conn_id, expected_args", - [ - ( - "airflow", - "connections", - "test_postgres", - {"mount_point": "airflow", "path": "connections/test_postgres"}, - ), - ( - "airflow", - "", - "path/to/connections/test_postgres", - {"mount_point": "airflow", "path": "path/to/connections/test_postgres"}, - ), - ( - None, - "connections", - "airflow/test_postgres", - {"mount_point": "airflow", "path": "connections/test_postgres"}, - ), - ( - None, - "", - "airflow/path/to/connections/test_postgres", - {"mount_point": "airflow", "path": "path/to/connections/test_postgres"}, - ), - ], - ) - @mock.patch("airflow.providers.hashicorp._internal_client.vault_client.hvac") - def test_get_conn_uri_engine_version_1( - self, mock_hvac, mount_point, connections_path, conn_id, expected_args - ): - mock_client = mock.MagicMock() - mock_hvac.Client.return_value = mock_client - mock_client.secrets.kv.v1.read_secret.return_value = { - "request_id": "182d0673-618c-9889-4cba-4e1f4cfe4b4b", - "lease_id": "", - "renewable": False, - "lease_duration": 2764800, - "data": {"conn_uri": "postgresql://airflow:airflow@host:5432/airflow"}, - "wrap_info": None, - "warnings": None, - "auth": None, - } - - kwargs = { - "connections_path": connections_path, - "mount_point": mount_point, - "auth_type": "token", - "url": "http://127.0.0.1:8200", - "token": "s.7AU0I51yv1Q1lxOIg1F3ZRAS", - "kv_engine_version": 1, - } - - test_client = VaultBackend(**kwargs) - with pytest.warns( - AirflowProviderDeprecationWarning, - match="Method `VaultBackend.get_conn_uri` is deprecated and will be removed in a future release.", - ): - returned_uri = test_client.get_conn_uri(conn_id=conn_id) - mock_client.secrets.kv.v1.read_secret.assert_called_once_with(**expected_args) - assert returned_uri == "postgresql://airflow:airflow@host:5432/airflow" - - @mock.patch("airflow.providers.hashicorp._internal_client.vault_client.hvac") - def test_get_conn_uri_engine_version_1_custom_auth_mount_point(self, mock_hvac): - mock_client = mock.MagicMock() - mock_hvac.Client.return_value = mock_client - mock_client.secrets.kv.v1.read_secret.return_value = { - "request_id": "182d0673-618c-9889-4cba-4e1f4cfe4b4b", - "lease_id": "", - "renewable": False, - "lease_duration": 2764800, - "data": {"conn_uri": "postgresql://airflow:airflow@host:5432/airflow"}, - "wrap_info": None, - "warnings": None, - "auth": None, - } - - kwargs = { - "connections_path": "connections", - "mount_point": "airflow", - "auth_mount_point": "custom", - "auth_type": "token", - "url": "http://127.0.0.1:8200", - "token": "s.7AU0I51yv1Q1lxOIg1F3ZRAS", - "kv_engine_version": 1, - } - - test_client = VaultBackend(**kwargs) - assert test_client.vault_client.auth_mount_point == "custom" - with pytest.warns( - AirflowProviderDeprecationWarning, - match="Method `VaultBackend.get_conn_uri` is deprecated and will be removed in a future release.", - ): - returned_uri = test_client.get_conn_uri(conn_id="test_postgres") - mock_client.secrets.kv.v1.read_secret.assert_called_once_with( - mount_point="airflow", path="connections/test_postgres" - ) - assert returned_uri == "postgresql://airflow:airflow@host:5432/airflow" - - @mock.patch.dict( - "os.environ", - { - "AIRFLOW_CONN_TEST_MYSQL": "mysql://airflow:airflow@host:5432/airflow", - }, - ) - @mock.patch("airflow.providers.hashicorp._internal_client.vault_client.hvac") - def test_get_conn_uri_non_existent_key(self, mock_hvac): - """ - Test that if the key with connection ID is not present in Vault, _VaultClient.get_connection - should return None - """ - mock_client = mock.MagicMock() - mock_hvac.Client.return_value = mock_client - # Response does not contain the requested key - mock_client.secrets.kv.v2.read_secret_version.side_effect = InvalidPath() - - kwargs = { - "connections_path": "connections", - "mount_point": "airflow", - "auth_type": "token", - "url": "http://127.0.0.1:8200", - "token": "s.7AU0I51yv1Q1lxOIg1F3ZRAS", - } - - test_client = VaultBackend(**kwargs) - with pytest.warns( - AirflowProviderDeprecationWarning, - match="Method `VaultBackend.get_conn_uri` is deprecated and will be removed in a future release.", - ): - assert test_client.get_conn_uri(conn_id="test_mysql") is None - mock_client.secrets.kv.v2.read_secret_version.assert_called_once_with( - mount_point="airflow", path="connections/test_mysql", version=None, raise_on_deleted_version=True - ) - assert test_client.get_connection(conn_id="test_mysql") is None - @mock.patch("airflow.providers.hashicorp._internal_client.vault_client.hvac") def test_get_variable_value(self, mock_hvac): mock_client = mock.MagicMock() From 1ff9fe80103c454e5ba94402aceaf9a1727e345b Mon Sep 17 00:00:00 2001 From: Jens Scheffler <95105677+jscheffl@users.noreply.github.com> Date: Fri, 6 Dec 2024 22:50:16 +0100 Subject: [PATCH 04/25] Remove Provider Deprecations in Oracle (#44704) * Remove Provider Deprecations in Oracle * Remove docs from removed operator and examples --- .../operators/index.rst | 14 ------- .../airflow/providers/oracle/CHANGELOG.rst | 15 +++++++ .../oracle/example_dags/example_oracle.py | 9 ----- .../airflow/providers/oracle/hooks/oracle.py | 9 ----- .../providers/oracle/operators/oracle.py | 40 +------------------ .../tests/oracle/operators/test_oracle.py | 32 +-------------- 6 files changed, 17 insertions(+), 102 deletions(-) diff --git a/docs/apache-airflow-providers-oracle/operators/index.rst b/docs/apache-airflow-providers-oracle/operators/index.rst index 990cb2e6503b2..0a09991085e33 100644 --- a/docs/apache-airflow-providers-oracle/operators/index.rst +++ b/docs/apache-airflow-providers-oracle/operators/index.rst @@ -22,20 +22,6 @@ Oracle Operators ================ The Oracle connection type provides connection to a Oracle database. -Execute SQL in an Oracle database ---------------------------------- - -To execute arbitrary SQL in an Oracle database, use the -:class:`~airflow.providers.oracle.operators.oracle.OracleOperator`. - -An example of executing a simple query is as follows: - -.. exampleinclude:: /../../providers/src/airflow/providers/oracle/example_dags/example_oracle.py - :language: python - :start-after: [START howto_oracle_operator] - :end-before: [END howto_oracle_operator] - - Execute a Stored Procedure in an Oracle database ------------------------------------------------ diff --git a/providers/src/airflow/providers/oracle/CHANGELOG.rst b/providers/src/airflow/providers/oracle/CHANGELOG.rst index ac2306379a700..394b8af9c14d6 100644 --- a/providers/src/airflow/providers/oracle/CHANGELOG.rst +++ b/providers/src/airflow/providers/oracle/CHANGELOG.rst @@ -27,6 +27,21 @@ Changelog --------- +main +.... + +Breaking changes +~~~~~~~~~~~~~~~~ + +.. warning:: + All deprecated classes, parameters and features have been removed from the Oracle provider package. + The following breaking changes were introduced: + + * Hooks + * Remove deprecated support setting the Oracle Service Name using ``conn.schema``. Please use ``conn.extra.service_name`` instead. + * Operators + * Remove ``airflow.providers.oracle.operators.oracle.OracleOperator``. Please use ``airflow.providers.common.sql.operators.sql.SQLExecuteQueryOperator`` instead. + 3.12.1 ...... diff --git a/providers/src/airflow/providers/oracle/example_dags/example_oracle.py b/providers/src/airflow/providers/oracle/example_dags/example_oracle.py index a68c0be6c882d..5501831e946ee 100644 --- a/providers/src/airflow/providers/oracle/example_dags/example_oracle.py +++ b/providers/src/airflow/providers/oracle/example_dags/example_oracle.py @@ -19,7 +19,6 @@ from datetime import datetime from airflow import DAG -from airflow.providers.common.sql.operators.sql import SQLExecuteQueryOperator from airflow.providers.oracle.operators.oracle import OracleStoredProcedureOperator with DAG( @@ -29,14 +28,6 @@ start_date=datetime(2023, 1, 1), dag_id="example_oracle", ) as dag: - # [START howto_oracle_operator] - - opr_sql = SQLExecuteQueryOperator( - task_id="task_sql", conn_id="oracle", sql="SELECT 1 FROM DUAL", autocommit=True - ) - - # [END howto_oracle_operator] - # [START howto_oracle_stored_procedure_operator_with_list_inout] opr_stored_procedure_with_list_input_output = OracleStoredProcedureOperator( diff --git a/providers/src/airflow/providers/oracle/hooks/oracle.py b/providers/src/airflow/providers/oracle/hooks/oracle.py index 192a2da4b1a64..f09cbc0d479e1 100644 --- a/providers/src/airflow/providers/oracle/hooks/oracle.py +++ b/providers/src/airflow/providers/oracle/hooks/oracle.py @@ -23,7 +23,6 @@ import oracledb -from airflow.exceptions import AirflowProviderDeprecationWarning from airflow.providers.common.sql.hooks.sql import DbApiHook PARAM_TYPES = {bool, float, int, str} @@ -197,14 +196,6 @@ def get_conn(self) -> oracledb.Connection: dsn += f":{conn.port}" if service_name: dsn += f"/{service_name}" - elif conn.schema: - warnings.warn( - """Using conn.schema to pass the Oracle Service Name is deprecated. - Please use conn.extra.service_name instead.""", - AirflowProviderDeprecationWarning, - stacklevel=2, - ) - dsn += f"/{conn.schema}" conn_config["dsn"] = dsn if "events" in conn.extra_dejson: diff --git a/providers/src/airflow/providers/oracle/operators/oracle.py b/providers/src/airflow/providers/oracle/operators/oracle.py index 60da9fcb70dad..9e1247262ffaf 100644 --- a/providers/src/airflow/providers/oracle/operators/oracle.py +++ b/providers/src/airflow/providers/oracle/operators/oracle.py @@ -19,55 +19,17 @@ import re from collections.abc import Sequence -from typing import TYPE_CHECKING, ClassVar +from typing import TYPE_CHECKING import oracledb -from deprecated import deprecated -from airflow.exceptions import AirflowProviderDeprecationWarning from airflow.models import BaseOperator -from airflow.providers.common.sql.operators.sql import SQLExecuteQueryOperator from airflow.providers.oracle.hooks.oracle import OracleHook if TYPE_CHECKING: from airflow.utils.context import Context -@deprecated( - reason="Please use `airflow.providers.common.sql.operators.sql.SQLExecuteQueryOperator`.", - category=AirflowProviderDeprecationWarning, -) -class OracleOperator(SQLExecuteQueryOperator): - """ - Executes sql code in a specific Oracle database. - - This class is deprecated. - - Please use :class:`airflow.providers.common.sql.operators.sql.SQLExecuteQueryOperator`. - - :param sql: the sql code to be executed. Can receive a str representing a sql statement, - a list of str (sql statements), or reference to a template file. - Template reference are recognized by str ending in '.sql' - (templated) - :param oracle_conn_id: The :ref:`Oracle connection id ` - reference to a specific Oracle database. - :param parameters: (optional, templated) the parameters to render the SQL query with. - :param autocommit: if True, each command is automatically committed. - (default value: False) - """ - - template_fields: Sequence[str] = ( - "parameters", - "sql", - ) - template_ext: Sequence[str] = (".sql",) - template_fields_renderers: ClassVar[dict] = {"sql": "sql"} - ui_color = "#ededed" - - def __init__(self, *, oracle_conn_id: str = "oracle_default", **kwargs) -> None: - super().__init__(conn_id=oracle_conn_id, **kwargs) - - class OracleStoredProcedureOperator(BaseOperator): """ Executes stored procedure in a specific Oracle database. diff --git a/providers/tests/oracle/operators/test_oracle.py b/providers/tests/oracle/operators/test_oracle.py index 623e214efc3d0..2f06e1513472a 100644 --- a/providers/tests/oracle/operators/test_oracle.py +++ b/providers/tests/oracle/operators/test_oracle.py @@ -23,39 +23,9 @@ import oracledb import pytest -from airflow.exceptions import AirflowProviderDeprecationWarning from airflow.models import TaskInstance -from airflow.providers.common.sql.hooks.sql import fetch_all_handler from airflow.providers.oracle.hooks.oracle import OracleHook -from airflow.providers.oracle.operators.oracle import OracleOperator, OracleStoredProcedureOperator - - -class TestOracleOperator: - @mock.patch("airflow.providers.common.sql.operators.sql.SQLExecuteQueryOperator.get_db_hook") - def test_execute(self, mock_get_db_hook): - sql = "SELECT * FROM test_table" - oracle_conn_id = "oracle_default" - parameters = {"parameter": "value"} - autocommit = False - context = "test_context" - task_id = "test_task_id" - - with pytest.warns(AirflowProviderDeprecationWarning, match="Call to deprecated class *"): - operator = OracleOperator( - sql=sql, - oracle_conn_id=oracle_conn_id, - parameters=parameters, - autocommit=autocommit, - task_id=task_id, - ) - operator.execute(context=context) - mock_get_db_hook.return_value.run.assert_called_once_with( - sql=sql, - autocommit=autocommit, - parameters=parameters, - handler=fetch_all_handler, - return_last=True, - ) +from airflow.providers.oracle.operators.oracle import OracleStoredProcedureOperator class TestOracleStoredProcedureOperator: From ee6f6fbbb1a04ed38f9269193cfdfb2b36866e72 Mon Sep 17 00:00:00 2001 From: LIU ZHE YOU <68415893+jason810496@users.noreply.github.com> Date: Sat, 7 Dec 2024 06:16:13 +0800 Subject: [PATCH 05/25] Remove Provider Deprecations in Redis (#44633) * Remove Provider Deprecations in Redis * Fix mock_get_connection in test_redis --- .../src/airflow/providers/redis/CHANGELOG.rst | 11 +++ .../airflow/providers/redis/hooks/redis.py | 14 +--- providers/tests/redis/hooks/test_redis.py | 74 ++++--------------- 3 files changed, 28 insertions(+), 71 deletions(-) diff --git a/providers/src/airflow/providers/redis/CHANGELOG.rst b/providers/src/airflow/providers/redis/CHANGELOG.rst index 2f53fb18b0c32..715553e757f25 100644 --- a/providers/src/airflow/providers/redis/CHANGELOG.rst +++ b/providers/src/airflow/providers/redis/CHANGELOG.rst @@ -27,6 +27,17 @@ Changelog --------- +main +..... + +.. warning:: + All deprecated classes, parameters and features have been removed from the Redis provider package. + The following breaking changes were introduced: + + * Hooks + + * Removed ``ssl_cert_file`` parameter from ``RedisHook``. Use ``ssl_certfile`` instead + 3.8.0 ..... diff --git a/providers/src/airflow/providers/redis/hooks/redis.py b/providers/src/airflow/providers/redis/hooks/redis.py index 10890f36b0eb1..2f956a5cb137e 100644 --- a/providers/src/airflow/providers/redis/hooks/redis.py +++ b/providers/src/airflow/providers/redis/hooks/redis.py @@ -19,12 +19,10 @@ from __future__ import annotations -import warnings from typing import Any from redis import Redis -from airflow.exceptions import AirflowProviderDeprecationWarning from airflow.hooks.base import BaseHook DEFAULT_SSL_CERT_REQS = "required" @@ -37,7 +35,7 @@ class RedisHook(BaseHook): You can set your db in the extra field of your connection as ``{"db": 3}``. Also you can set ssl parameters as: - ``{"ssl": true, "ssl_cert_reqs": "require", "ssl_cert_file": "/path/to/cert.pem", etc}``. + ``{"ssl": true, "ssl_cert_reqs": "require", "ssl_certfile": "/path/to/cert.pem", etc}``. """ conn_name_attr = "redis_conn_id" @@ -81,16 +79,6 @@ def get_conn(self): ] ssl_args = {name: val for name, val in conn.extra_dejson.items() if name in ssl_arg_names} - # This logic is for backward compatibility only - if "ssl_cert_file" in conn.extra_dejson and "ssl_certfile" not in conn.extra_dejson: - warnings.warn( - "Extra parameter `ssl_cert_file` deprecated and will be removed " - "in a future release. Please use `ssl_certfile` instead.", - AirflowProviderDeprecationWarning, - stacklevel=2, - ) - ssl_args["ssl_certfile"] = conn.extra_dejson.get("ssl_cert_file") - if not self.redis: self.log.debug( 'Initializing redis object for conn_id "%s" on %s:%s:%s', diff --git a/providers/tests/redis/hooks/test_redis.py b/providers/tests/redis/hooks/test_redis.py index 1216dc6b64131..b64ffb909d241 100644 --- a/providers/tests/redis/hooks/test_redis.py +++ b/providers/tests/redis/hooks/test_redis.py @@ -21,7 +21,6 @@ import pytest -from airflow.exceptions import AirflowProviderDeprecationWarning from airflow.models import Connection from airflow.providers.redis.hooks.redis import RedisHook @@ -29,11 +28,6 @@ class TestRedisHook: - deprecation_message = ( - "Extra parameter `ssl_cert_file` deprecated and will be removed " - "in a future release. Please use `ssl_certfile` instead." - ) - def test_get_conn(self): hook = RedisHook(redis_conn_id="redis_default") assert hook.redis is None @@ -47,24 +41,26 @@ def test_get_conn(self): @mock.patch("airflow.providers.redis.hooks.redis.Redis") @mock.patch( "airflow.providers.redis.hooks.redis.RedisHook.get_connection", - return_value=Connection( + ) + def test_get_conn_with_extra_config(self, mock_get_connection, mock_redis): + connection = Connection( login="user", password="password", host="remote_host", port=1234, - extra="""{ - "db": 2, - "ssl": true, - "ssl_cert_reqs": "required", - "ssl_ca_certs": "/path/to/custom/ca-cert", - "ssl_keyfile": "/path/to/key-file", - "ssl_certfile": "/path/to/cert-file", - "ssl_check_hostname": true - }""", - ), - ) - def test_get_conn_with_extra_config(self, mock_get_connection, mock_redis): - connection = mock_get_connection.return_value + ) + connection.set_extra( + """{ + "db": 2, + "ssl": true, + "ssl_cert_reqs": "required", + "ssl_ca_certs": "/path/to/custom/ca-cert", + "ssl_keyfile": "/path/to/key-file", + "ssl_certfile": "/path/to/cert-file", + "ssl_check_hostname": true + }""" + ) + mock_get_connection.return_value = connection hook = RedisHook() hook.get_conn() @@ -82,44 +78,6 @@ def test_get_conn_with_extra_config(self, mock_get_connection, mock_redis): ssl_check_hostname=connection.extra_dejson["ssl_check_hostname"], ) - @mock.patch("airflow.providers.redis.hooks.redis.Redis") - @mock.patch( - "airflow.providers.redis.hooks.redis.RedisHook.get_connection", - return_value=Connection( - password="password", - host="remote_host", - port=1234, - extra="""{ - "db": 2, - "ssl": true, - "ssl_cert_reqs": "required", - "ssl_ca_certs": "/path/to/custom/ca-cert", - "ssl_keyfile": "/path/to/key-file", - "ssl_cert_file": "/path/to/cert-file", - "ssl_check_hostname": true - }""", - ), - ) - def test_get_conn_with_deprecated_extra_config(self, mock_get_connection, mock_redis): - connection = mock_get_connection.return_value - hook = RedisHook() - - with pytest.warns(AirflowProviderDeprecationWarning, match=self.deprecation_message): - hook.get_conn() - mock_redis.assert_called_once_with( - host=connection.host, - password=connection.password, - username=None, - port=connection.port, - db=connection.extra_dejson["db"], - ssl=connection.extra_dejson["ssl"], - ssl_cert_reqs=connection.extra_dejson["ssl_cert_reqs"], - ssl_ca_certs=connection.extra_dejson["ssl_ca_certs"], - ssl_keyfile=connection.extra_dejson["ssl_keyfile"], - ssl_certfile=connection.extra_dejson["ssl_cert_file"], - ssl_check_hostname=connection.extra_dejson["ssl_check_hostname"], - ) - def test_get_conn_password_stays_none(self): hook = RedisHook(redis_conn_id="redis_default") hook.get_conn() From 8bbba50d4a87f8c944465cded15f537b588076ac Mon Sep 17 00:00:00 2001 From: vatsrahul1001 <43964496+vatsrahul1001@users.noreply.github.com> Date: Sat, 7 Dec 2024 09:32:58 +0530 Subject: [PATCH 06/25] Remove deprecations from Apache hive Provider (#44715) * remove deprecations * Update providers/src/airflow/providers/apache/hive/CHANGELOG.rst Co-authored-by: Wei Lee --------- Co-authored-by: Wei Lee --- providers/src/airflow/providers/apache/hive/CHANGELOG.rst | 8 ++++++++ providers/src/airflow/providers/apache/hive/hooks/hive.py | 8 -------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/providers/src/airflow/providers/apache/hive/CHANGELOG.rst b/providers/src/airflow/providers/apache/hive/CHANGELOG.rst index 1213e305cf813..b55a36bd82cb4 100644 --- a/providers/src/airflow/providers/apache/hive/CHANGELOG.rst +++ b/providers/src/airflow/providers/apache/hive/CHANGELOG.rst @@ -26,6 +26,14 @@ Changelog --------- +main +.... + +.. warning:: + All deprecated classes, parameters and features have been removed from the {provider_name} provider package. + The following breaking changes were introduced: + + * Removed deprecated ``GSSAPI`` for ``auth_mechanism.`` Use ``KERBEROS`` instead. 8.2.1 ..... diff --git a/providers/src/airflow/providers/apache/hive/hooks/hive.py b/providers/src/airflow/providers/apache/hive/hooks/hive.py index d768743cd459b..dde421e01e69b 100644 --- a/providers/src/airflow/providers/apache/hive/hooks/hive.py +++ b/providers/src/airflow/providers/apache/hive/hooks/hive.py @@ -873,14 +873,6 @@ def get_conn(self, schema: str | None = None) -> Any: auth_mechanism = db.extra_dejson.get("auth_mechanism", "KERBEROS") kerberos_service_name = db.extra_dejson.get("kerberos_service_name", "hive") - # pyhive uses GSSAPI instead of KERBEROS as a auth_mechanism identifier - if auth_mechanism == "GSSAPI": - self.log.warning( - "Detected deprecated 'GSSAPI' for auth_mechanism for %s. Please use 'KERBEROS' instead", - self.hiveserver2_conn_id, # type: ignore - ) - auth_mechanism = "KERBEROS" - # Password should be set if and only if in LDAP or CUSTOM mode if auth_mechanism in ("LDAP", "CUSTOM"): password = db.password From ddfcd900c03b02bac22c0fd8dd51736ce6e2f586 Mon Sep 17 00:00:00 2001 From: Jens Scheffler <95105677+jscheffl@users.noreply.github.com> Date: Sat, 7 Dec 2024 07:36:00 +0100 Subject: [PATCH 07/25] Remove Provider Deprecations in Vertica (#44748) --- .../airflow/providers/vertica/CHANGELOG.rst | 13 +++++ .../providers/vertica/operators/__init__.py | 17 ------ .../providers/vertica/operators/vertica.py | 53 ------------------- .../airflow/providers/vertica/provider.yaml | 5 -- providers/tests/vertica/operators/__init__.py | 17 ------ .../tests/vertica/operators/test_vertica.py | 47 ---------------- 6 files changed, 13 insertions(+), 139 deletions(-) delete mode 100644 providers/src/airflow/providers/vertica/operators/__init__.py delete mode 100644 providers/src/airflow/providers/vertica/operators/vertica.py delete mode 100644 providers/tests/vertica/operators/__init__.py delete mode 100644 providers/tests/vertica/operators/test_vertica.py diff --git a/providers/src/airflow/providers/vertica/CHANGELOG.rst b/providers/src/airflow/providers/vertica/CHANGELOG.rst index 1329aa22700f0..d36d621dc02f7 100644 --- a/providers/src/airflow/providers/vertica/CHANGELOG.rst +++ b/providers/src/airflow/providers/vertica/CHANGELOG.rst @@ -28,6 +28,19 @@ Changelog --------- +main +.... + +Breaking changes +~~~~~~~~~~~~~~~~ + +.. warning:: + All deprecated classes, parameters and features have been removed from the Vertica provider package. + The following breaking changes were introduced: + + * Operators + * Remove ``airflow.providers.vertica.operators.vertica.VerticaOperator``. Please use ``airflow.providers.common.sql.operators.sql.SQLExecuteQueryOperator``. + 3.9.1 ..... diff --git a/providers/src/airflow/providers/vertica/operators/__init__.py b/providers/src/airflow/providers/vertica/operators/__init__.py deleted file mode 100644 index 217e5db960782..0000000000000 --- a/providers/src/airflow/providers/vertica/operators/__init__.py +++ /dev/null @@ -1,17 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. diff --git a/providers/src/airflow/providers/vertica/operators/vertica.py b/providers/src/airflow/providers/vertica/operators/vertica.py deleted file mode 100644 index 1077fbaa0ee58..0000000000000 --- a/providers/src/airflow/providers/vertica/operators/vertica.py +++ /dev/null @@ -1,53 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -from __future__ import annotations - -from collections.abc import Sequence -from typing import Any, ClassVar - -from deprecated import deprecated - -from airflow.exceptions import AirflowProviderDeprecationWarning -from airflow.providers.common.sql.operators.sql import SQLExecuteQueryOperator - - -@deprecated( - reason="Please use `airflow.providers.common.sql.operators.sql.SQLExecuteQueryOperator`.", - category=AirflowProviderDeprecationWarning, -) -class VerticaOperator(SQLExecuteQueryOperator): - """ - Executes sql code in a specific Vertica database. - - This class is deprecated. - - Please use :class:`airflow.providers.common.sql.operators.sql.SQLExecuteQueryOperator`. - - :param vertica_conn_id: reference to a specific Vertica database - :param sql: the SQL code to be executed as a single string, or - a list of str (sql statements), or a reference to a template file. - Template references are recognized by str ending in '.sql' - """ - - template_fields: Sequence[str] = ("sql",) - template_ext: Sequence[str] = (".sql",) - template_fields_renderers: ClassVar[dict] = {"sql": "sql"} - ui_color = "#b4e0ff" - - def __init__(self, *, vertica_conn_id: str = "vertica_default", **kwargs: Any) -> None: - super().__init__(conn_id=vertica_conn_id, **kwargs) diff --git a/providers/src/airflow/providers/vertica/provider.yaml b/providers/src/airflow/providers/vertica/provider.yaml index 564f26f1c91f5..8e55440be6fd1 100644 --- a/providers/src/airflow/providers/vertica/provider.yaml +++ b/providers/src/airflow/providers/vertica/provider.yaml @@ -64,11 +64,6 @@ integrations: logo: /integration-logos/vertica/Vertica.png tags: [software] -operators: - - integration-name: Vertica - python-modules: - - airflow.providers.vertica.operators.vertica - hooks: - integration-name: Vertica python-modules: diff --git a/providers/tests/vertica/operators/__init__.py b/providers/tests/vertica/operators/__init__.py deleted file mode 100644 index 217e5db960782..0000000000000 --- a/providers/tests/vertica/operators/__init__.py +++ /dev/null @@ -1,17 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. diff --git a/providers/tests/vertica/operators/test_vertica.py b/providers/tests/vertica/operators/test_vertica.py deleted file mode 100644 index 0668a7304e384..0000000000000 --- a/providers/tests/vertica/operators/test_vertica.py +++ /dev/null @@ -1,47 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -from __future__ import annotations - -import re -from unittest import mock - -import pytest - -from airflow.exceptions import AirflowProviderDeprecationWarning -from airflow.providers.common.sql.hooks.sql import fetch_all_handler -from airflow.providers.vertica.operators.vertica import VerticaOperator - - -class TestVerticaOperator: - @mock.patch("airflow.providers.common.sql.operators.sql.SQLExecuteQueryOperator.get_db_hook") - def test_execute(self, mock_get_db_hook): - sql = "select a, b, c" - warning_message = re.escape( - "Please use `airflow.providers.common.sql.operators.sql.SQLExecuteQueryOperator`" - ) - with pytest.warns(AirflowProviderDeprecationWarning, match=warning_message): - op = VerticaOperator(task_id="test_task_id", sql=sql) - op.execute({}) - - mock_get_db_hook.return_value.run.assert_called_once_with( - sql=sql, - autocommit=False, - handler=fetch_all_handler, - parameters=None, - return_last=True, - ) From c894dab20136ca11f096effacd5c58b794129d79 Mon Sep 17 00:00:00 2001 From: vatsrahul1001 <43964496+vatsrahul1001@users.noreply.github.com> Date: Sat, 7 Dec 2024 12:37:38 +0530 Subject: [PATCH 08/25] Remove deprecations from Teradata Provider (#44746) --- .../airflow/providers/teradata/CHANGELOG.rst | 10 ++++ .../providers/teradata/hooks/teradata.py | 36 ------------ .../transfers/teradata_to_teradata.py | 2 +- .../tests/teradata/hooks/test_teradata.py | 55 ------------------- .../transfers/test_teradata_to_teradata.py | 2 +- 5 files changed, 12 insertions(+), 93 deletions(-) diff --git a/providers/src/airflow/providers/teradata/CHANGELOG.rst b/providers/src/airflow/providers/teradata/CHANGELOG.rst index 622661ab26ae0..b8a9141385ef4 100644 --- a/providers/src/airflow/providers/teradata/CHANGELOG.rst +++ b/providers/src/airflow/providers/teradata/CHANGELOG.rst @@ -25,6 +25,16 @@ Changelog --------- +main +.... + +.. warning:: + All deprecated classes, parameters and features have been removed from the teradata provider package. + The following breaking changes were introduced: + + * Removed deprecated ``bulk_insert_rows`` method from ``hooks``. Use ``insert_rows`` instead. + + 2.6.1 ..... diff --git a/providers/src/airflow/providers/teradata/hooks/teradata.py b/providers/src/airflow/providers/teradata/hooks/teradata.py index f4e6348ad0f3f..0742bc374369d 100644 --- a/providers/src/airflow/providers/teradata/hooks/teradata.py +++ b/providers/src/airflow/providers/teradata/hooks/teradata.py @@ -20,14 +20,12 @@ from __future__ import annotations import re -import warnings from typing import TYPE_CHECKING, Any import sqlalchemy import teradatasql from teradatasql import TeradataConnection -from airflow.exceptions import AirflowProviderDeprecationWarning from airflow.providers.common.sql.hooks.sql import DbApiHook if TYPE_CHECKING: @@ -160,40 +158,6 @@ def set_query_band(self, query_band_text, teradata_conn): except Exception as ex: self.log.error("Error occurred while setting session query band: %s ", str(ex)) - def bulk_insert_rows( - self, - table: str, - rows: list[tuple], - target_fields: list[str] | None = None, - commit_every: int = 5000, - ): - """ - Use :func:`insert_rows` instead, this is deprecated. - - Insert bulk of records into Teradata SQL Database. - - This uses prepared statements via `executemany()`. For best performance, - pass in `rows` as an iterator. - - :param table: target Teradata database table, use dot notation to target a - specific database - :param rows: the rows to insert into the table - :param target_fields: the names of the columns to fill in the table, default None. - If None, each row should have some order as table columns name - :param commit_every: the maximum number of rows to insert in one transaction - Default 5000. Set greater than 0. Set 1 to insert each row in each transaction - """ - warnings.warn( - "bulk_insert_rows is deprecated. Please use the insert_rows method instead.", - AirflowProviderDeprecationWarning, - stacklevel=2, - ) - - if not rows: - raise ValueError("parameter rows could not be None or empty iterable") - - self.insert_rows(table=table, rows=rows, target_fields=target_fields, commit_every=commit_every) - def _get_conn_config_teradatasql(self) -> dict[str, Any]: """Return set of config params required for connecting to Teradata DB using teradatasql client.""" conn: Connection = self.get_connection(self.get_conn_id()) diff --git a/providers/src/airflow/providers/teradata/transfers/teradata_to_teradata.py b/providers/src/airflow/providers/teradata/transfers/teradata_to_teradata.py index f10eb6d10048f..077ce097aed5d 100644 --- a/providers/src/airflow/providers/teradata/transfers/teradata_to_teradata.py +++ b/providers/src/airflow/providers/teradata/transfers/teradata_to_teradata.py @@ -91,7 +91,7 @@ def execute(self, context: Context) -> None: rows_total = 0 if len(target_fields) != 0: for rows in iter(lambda: cursor.fetchmany(self.rows_chunk), []): - dest_hook.bulk_insert_rows( + dest_hook.insert_rows( self.destination_table, rows, target_fields=target_fields, diff --git a/providers/tests/teradata/hooks/test_teradata.py b/providers/tests/teradata/hooks/test_teradata.py index 21a19308b2da7..f2bcb1ee269ba 100644 --- a/providers/tests/teradata/hooks/test_teradata.py +++ b/providers/tests/teradata/hooks/test_teradata.py @@ -21,9 +21,6 @@ from datetime import datetime from unittest import mock -import pytest - -from airflow.exceptions import AirflowProviderDeprecationWarning from airflow.models import Connection from airflow.providers.teradata.hooks.teradata import TeradataHook, _handle_user_query_band_text @@ -232,58 +229,6 @@ def test_insert_rows(self): [("'test_string", None, "2023-08-15T00:00:00", "1", "3.14", "str")], ) - def test_bulk_insert_rows_with_fields(self): - rows = [(1, 2, 3), (4, 5, 6), (7, 8, 9)] - target_fields = ["col1", "col2", "col3"] - with pytest.warns( - AirflowProviderDeprecationWarning, - match="bulk_insert_rows is deprecated. Please use the insert_rows method instead.", - ): - self.test_db_hook.bulk_insert_rows("table", rows, target_fields) - self.cur.executemany.assert_called_once_with( - "INSERT INTO table (col1, col2, col3) VALUES (?,?,?)", - [("1", "2", "3"), ("4", "5", "6"), ("7", "8", "9")], - ) - - def test_bulk_insert_rows_with_commit_every(self): - rows = [(1, 2, 3), (4, 5, 6), (7, 8, 9)] - target_fields = ["col1", "col2", "col3"] - with pytest.warns( - AirflowProviderDeprecationWarning, - match="bulk_insert_rows is deprecated. Please use the insert_rows method instead.", - ): - self.test_db_hook.bulk_insert_rows("table", rows, target_fields, commit_every=2) - calls = [ - mock.call( - "INSERT INTO table (col1, col2, col3) VALUES (?,?,?)", [("1", "2", "3"), ("4", "5", "6")] - ), - mock.call("INSERT INTO table (col1, col2, col3) VALUES (?,?,?)", [("7", "8", "9")]), - ] - self.cur.executemany.assert_has_calls(calls, any_order=True) - - def test_bulk_insert_rows_without_fields(self): - rows = [(1, 2, 3), (4, 5, 6), (7, 8, 9)] - with pytest.warns( - AirflowProviderDeprecationWarning, - match="bulk_insert_rows is deprecated. Please use the insert_rows method instead.", - ): - self.test_db_hook.bulk_insert_rows("table", rows) - self.cur.executemany.assert_called_once_with( - "INSERT INTO table VALUES (?,?,?)", - [("1", "2", "3"), ("4", "5", "6"), ("7", "8", "9")], - ) - - def test_bulk_insert_rows_no_rows(self): - rows = [] - with ( - pytest.raises(ValueError), - pytest.warns( - AirflowProviderDeprecationWarning, - match="bulk_insert_rows is deprecated. Please use the insert_rows method instead.", - ), - ): - self.test_db_hook.bulk_insert_rows("table", rows) - def test_call_proc_dict(self): parameters = {"a": 1, "b": 2, "c": 3} diff --git a/providers/tests/teradata/transfers/test_teradata_to_teradata.py b/providers/tests/teradata/transfers/test_teradata_to_teradata.py index 8f20e1f571f12..023ca9e365118 100644 --- a/providers/tests/teradata/transfers/test_teradata_to_teradata.py +++ b/providers/tests/teradata/transfers/test_teradata_to_teradata.py @@ -109,7 +109,7 @@ def test_execution(self, mocked_src_hook, mocked_dest_hook): mock.call(rows_chunk), ] mock_cursor.fetchmany.assert_has_calls(calls) - mocked_dest_hook.bulk_insert_rows.assert_called_once_with( + mocked_dest_hook.insert_rows.assert_called_once_with( self.destination_table, cursor_rows, commit_every=rows_chunk, From 58bd547824097acaaa667c1e068fbbb841a6bdee Mon Sep 17 00:00:00 2001 From: Wei Lee Date: Sat, 7 Dec 2024 15:43:50 +0800 Subject: [PATCH 09/25] feat(datasets): add backward compat for DatasetAll, DatasetAny, expand_alias_to_datasets and DatasetAliasEvent (#44635) --- airflow/datasets/__init__.py | 39 ++++++++++++++++++++++++++-------- tests/datasets/test_dataset.py | 20 +++++++++++++++-- 2 files changed, 48 insertions(+), 11 deletions(-) diff --git a/airflow/datasets/__init__.py b/airflow/datasets/__init__.py index d7c51a3082377..d082d6fbed854 100644 --- a/airflow/datasets/__init__.py +++ b/airflow/datasets/__init__.py @@ -24,17 +24,38 @@ # lib.) This is required by some IDEs to resolve the import paths. from __future__ import annotations +import importlib import warnings -from airflow.sdk.definitions.asset import AssetAlias as DatasetAlias, Dataset - # TODO: Remove this module in Airflow 3.2 -warnings.warn( - "Import from the airflow.dataset module is deprecated and " - "will be removed in the Airflow 3.2. Please import it from 'airflow.sdk.definitions.asset'.", - DeprecationWarning, - stacklevel=2, -) +_names_moved = { + "DatasetAlias": ("airflow.sdk.definitions.asset", "AssetAlias"), + "DatasetAll": ("airflow.sdk.definitions.asset", "AssetAll"), + "DatasetAny": ("airflow.sdk.definitions.asset", "DatasetAny"), + "Dataset": ("airflow.sdk.definitions.asset", "Asset"), + "expand_alias_to_datasets": ("airflow.models.asset", "expand_alias_to_assets"), +} + + +def __getattr__(name: str): + # PEP-562: Lazy loaded attributes on python modules + if name not in _names_moved: + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") + + module_path, new_name = _names_moved[name] + warnings.warn( + f"Import 'airflow.dataset.{name}' is deprecated and " + f"will be removed in the Airflow 3.2. Please import it from '{module_path}.{new_name}'.", + DeprecationWarning, + stacklevel=2, + ) + mod = importlib.import_module(module_path, __name__) + val = getattr(mod, new_name) + + # Store for next time + globals()[name] = val + return val + -__all__ = ["Dataset", "DatasetAlias"] +__all__ = ["Dataset", "DatasetAlias", "DatasetAll", "DatasetAny", "expand_alias_to_datasets"] diff --git a/tests/datasets/test_dataset.py b/tests/datasets/test_dataset.py index 4898bd5fd47e6..44e62839fa9fb 100644 --- a/tests/datasets/test_dataset.py +++ b/tests/datasets/test_dataset.py @@ -36,8 +36,24 @@ "airflow.datasets", "Dataset", ( - "Import from the airflow.dataset module is deprecated and " - "will be removed in the Airflow 3.2. Please import it from 'airflow.sdk.definitions.asset'." + "Import 'airflow.dataset.Dataset' is deprecated and " + "will be removed in the Airflow 3.2. Please import it from 'airflow.sdk.definitions.asset.Asset'." + ), + ), + ( + "airflow.datasets", + "DatasetAlias", + ( + "Import 'airflow.dataset.DatasetAlias' is deprecated and " + "will be removed in the Airflow 3.2. Please import it from 'airflow.sdk.definitions.asset.AssetAlias'." + ), + ), + ( + "airflow.datasets", + "expand_alias_to_datasets", + ( + "Import 'airflow.dataset.expand_alias_to_datasets' is deprecated and " + "will be removed in the Airflow 3.2. Please import it from 'airflow.models.asset.expand_alias_to_assets'." ), ), ( From 929e6296eaf037afb8c4930f0a96acd14aa5c2ee Mon Sep 17 00:00:00 2001 From: vatsrahul1001 <43964496+vatsrahul1001@users.noreply.github.com> Date: Sat, 7 Dec 2024 13:30:23 +0530 Subject: [PATCH 10/25] Remove deprecations from Weaviate Provider (#44745) * remove weaviate deprecations * update provider name in change log * fix docs --- .../airflow/providers/weaviate/CHANGELOG.rst | 9 +++++++ .../providers/weaviate/operators/weaviate.py | 20 +++----------- .../tests/weaviate/operators/test_weaviate.py | 27 ------------------- 3 files changed, 12 insertions(+), 44 deletions(-) diff --git a/providers/src/airflow/providers/weaviate/CHANGELOG.rst b/providers/src/airflow/providers/weaviate/CHANGELOG.rst index 3565b781d22d0..c40d603d92424 100644 --- a/providers/src/airflow/providers/weaviate/CHANGELOG.rst +++ b/providers/src/airflow/providers/weaviate/CHANGELOG.rst @@ -20,6 +20,15 @@ Changelog --------- +main +.... + +.. warning:: + All deprecated classes, parameters and features have been removed from the weaviate provider package. + The following breaking changes were introduced: + + * Removed deprecated ``input_json`` parameter from ``WeaviateIngestOperator``. Use ``input_data`` instead. + 2.1.0 ..... diff --git a/providers/src/airflow/providers/weaviate/operators/weaviate.py b/providers/src/airflow/providers/weaviate/operators/weaviate.py index 22631f7c40c05..dd97dc3c66bc0 100644 --- a/providers/src/airflow/providers/weaviate/operators/weaviate.py +++ b/providers/src/airflow/providers/weaviate/operators/weaviate.py @@ -17,12 +17,10 @@ from __future__ import annotations -import warnings from collections.abc import Sequence from functools import cached_property from typing import TYPE_CHECKING, Any -from airflow.exceptions import AirflowProviderDeprecationWarning from airflow.models import BaseOperator from airflow.providers.weaviate.hooks.weaviate import WeaviateHook @@ -51,11 +49,9 @@ class WeaviateIngestOperator(BaseOperator): :param vector_col: key/column name in which the vectors are stored. :param hook_params: Optional config params to be passed to the underlying hook. Should match the desired hook constructor params. - :param input_json: (Deprecated) The JSON representing Weaviate data objects to generate embeddings on - (or provides custom vectors) and store them in the Weaviate class. """ - template_fields: Sequence[str] = ("input_json", "input_data") + template_fields: Sequence[str] = ("input_data",) def __init__( self, @@ -66,29 +62,19 @@ def __init__( uuid_column: str = "id", tenant: str | None = None, hook_params: dict | None = None, - input_json: list[dict[str, Any]] | pd.DataFrame | None = None, **kwargs: Any, ) -> None: super().__init__(**kwargs) self.collection_name = collection_name self.conn_id = conn_id self.vector_col = vector_col - self.input_json = input_json self.uuid_column = uuid_column self.tenant = tenant self.input_data = input_data self.hook_params = hook_params or {} - if (self.input_data is None) and (input_json is not None): - warnings.warn( - "Passing 'input_json' to WeaviateIngestOperator is deprecated and" - " you should use 'input_data' instead", - AirflowProviderDeprecationWarning, - stacklevel=2, - ) - self.input_data = input_json - elif self.input_data is None and input_json is None: - raise TypeError("Either input_json or input_data is required") + if self.input_data is None: + raise TypeError("input_data is required") @cached_property def hook(self) -> WeaviateHook: diff --git a/providers/tests/weaviate/operators/test_weaviate.py b/providers/tests/weaviate/operators/test_weaviate.py index 8060fdf023116..a90c1c538058d 100644 --- a/providers/tests/weaviate/operators/test_weaviate.py +++ b/providers/tests/weaviate/operators/test_weaviate.py @@ -20,7 +20,6 @@ import pytest -from airflow.exceptions import AirflowProviderDeprecationWarning from airflow.utils.task_instance_session import set_current_task_instance_session pytest.importorskip("weaviate") @@ -47,30 +46,6 @@ def test_constructor(self, operator): assert operator.input_data == [{"data": "sample_data"}] assert operator.hook_params == {} - @patch("airflow.providers.weaviate.operators.weaviate.WeaviateIngestOperator.log") - def test_execute_with_input_json(self, mock_log, operator): - with pytest.warns( - AirflowProviderDeprecationWarning, - match="Passing 'input_json' to WeaviateIngestOperator is deprecated and you should use 'input_data' instead", - ): - operator = WeaviateIngestOperator( - task_id="weaviate_task", - conn_id="weaviate_conn", - collection_name="my_collection", - input_json=[{"data": "sample_data"}], - ) - operator.hook.batch_data = MagicMock() - - operator.execute(context=None) - - operator.hook.batch_data.assert_called_once_with( - collection_name="my_collection", - data=[{"data": "sample_data"}], - vector_col="Vector", - uuid_col="id", - ) - mock_log.debug.assert_called_once_with("Input data: %s", [{"data": "sample_data"}]) - @patch("airflow.providers.weaviate.operators.weaviate.WeaviateIngestOperator.log") def test_execute_with_input_data(self, mock_log, operator): operator.hook.batch_data = MagicMock() @@ -94,12 +69,10 @@ def test_templates(self, create_task_instance_of_operator): task_id="task-id", conn_id="weaviate_conn", collection_name="my_collection", - input_json="{{ dag.dag_id }}", input_data="{{ dag.dag_id }}", ) ti.render_templates() - assert dag_id == ti.task.input_json assert dag_id == ti.task.input_data @pytest.mark.db_test From ad7a3dd5e11830e8acb4e05d93553d9ae4a5213f Mon Sep 17 00:00:00 2001 From: vatsrahul1001 <43964496+vatsrahul1001@users.noreply.github.com> Date: Sat, 7 Dec 2024 13:42:40 +0530 Subject: [PATCH 11/25] Remove deprecations from SFTP Provider (#44740) * remove deprecations * add changelog * update provider name in changelog --- .../src/airflow/providers/sftp/CHANGELOG.rst | 14 ++ .../src/airflow/providers/sftp/hooks/sftp.py | 37 +---- .../airflow/providers/sftp/operators/sftp.py | 43 +----- providers/tests/sftp/hooks/test_sftp.py | 41 +---- providers/tests/sftp/operators/test_sftp.py | 141 +++--------------- 5 files changed, 47 insertions(+), 229 deletions(-) diff --git a/providers/src/airflow/providers/sftp/CHANGELOG.rst b/providers/src/airflow/providers/sftp/CHANGELOG.rst index 5e3ad50b1aedb..2313623f247ed 100644 --- a/providers/src/airflow/providers/sftp/CHANGELOG.rst +++ b/providers/src/airflow/providers/sftp/CHANGELOG.rst @@ -27,6 +27,20 @@ Changelog --------- +main +.... + +.. warning:: + All deprecated classes, parameters and features have been removed from the sftp provider package. + The following breaking changes were introduced: + + * Removed deprecated ``ssh_hook`` parameter from ``SFTPOperator``. Use ``sftp_hook`` instead. + * Removed deprecated ``ssh_hook`` parameter from ``SFTPHook``. + * Removed deprecated ``ftp_conn_id`` parameter from ``SFTPHook``. Use ``ssh_conn_id`` instead. + + + + 4.11.1 ...... diff --git a/providers/src/airflow/providers/sftp/hooks/sftp.py b/providers/src/airflow/providers/sftp/hooks/sftp.py index f4eb0fedace28..e4c88aa32e13a 100644 --- a/providers/src/airflow/providers/sftp/hooks/sftp.py +++ b/providers/src/airflow/providers/sftp/hooks/sftp.py @@ -22,7 +22,6 @@ import datetime import os import stat -import warnings from collections.abc import Sequence from fnmatch import fnmatch from typing import TYPE_CHECKING, Any, Callable @@ -30,7 +29,7 @@ import asyncssh from asgiref.sync import sync_to_async -from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning +from airflow.exceptions import AirflowException from airflow.hooks.base import BaseHook from airflow.providers.ssh.hooks.ssh import SSHHook @@ -62,7 +61,6 @@ class SFTPHook(SSHHook): For consistency reasons with SSHHook, the preferred parameter is "ssh_conn_id". :param ssh_conn_id: The :ref:`sftp connection id` - :param ssh_hook: Optional SSH hook (included to support passing of an SSH hook to the SFTP operator) """ conn_name_attr = "ssh_conn_id" @@ -82,39 +80,12 @@ def get_ui_field_behaviour(cls) -> dict[str, Any]: def __init__( self, ssh_conn_id: str | None = "sftp_default", - ssh_hook: SSHHook | None = None, host_proxy_cmd: str | None = None, *args, **kwargs, ) -> None: self.conn: paramiko.SFTPClient | None = None - # TODO: remove support for ssh_hook when it is removed from SFTPOperator - self.ssh_hook = ssh_hook - - if self.ssh_hook is not None: - warnings.warn( - "Parameter `ssh_hook` is deprecated and will be removed in a future version.", - AirflowProviderDeprecationWarning, - stacklevel=2, - ) - if not isinstance(self.ssh_hook, SSHHook): - raise AirflowException( - f"ssh_hook must be an instance of SSHHook, but got {type(self.ssh_hook)}" - ) - self.log.info("ssh_hook is provided. It will be used to generate SFTP connection.") - self.ssh_conn_id = self.ssh_hook.ssh_conn_id - return - - ftp_conn_id = kwargs.pop("ftp_conn_id", None) - if ftp_conn_id: - warnings.warn( - "Parameter `ftp_conn_id` is deprecated. Please use `ssh_conn_id` instead.", - AirflowProviderDeprecationWarning, - stacklevel=2, - ) - ssh_conn_id = ftp_conn_id - kwargs["ssh_conn_id"] = ssh_conn_id kwargs["host_proxy_cmd"] = host_proxy_cmd self.ssh_conn_id = ssh_conn_id @@ -124,11 +95,7 @@ def __init__( def get_conn(self) -> paramiko.SFTPClient: # type: ignore[override] """Open an SFTP connection to the remote host.""" if self.conn is None: - # TODO: remove support for ssh_hook when it is removed from SFTPOperator - if self.ssh_hook is not None: - self.conn = self.ssh_hook.get_conn().open_sftp() - else: - self.conn = super().get_conn().open_sftp() + self.conn = super().get_conn().open_sftp() return self.conn def close_conn(self) -> None: diff --git a/providers/src/airflow/providers/sftp/operators/sftp.py b/providers/src/airflow/providers/sftp/operators/sftp.py index 95a2880ae2eac..8538f874585f1 100644 --- a/providers/src/airflow/providers/sftp/operators/sftp.py +++ b/providers/src/airflow/providers/sftp/operators/sftp.py @@ -21,17 +21,15 @@ import os import socket -import warnings from collections.abc import Sequence from pathlib import Path from typing import Any import paramiko -from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning +from airflow.exceptions import AirflowException from airflow.models import BaseOperator from airflow.providers.sftp.hooks.sftp import SFTPHook -from airflow.providers.ssh.hooks.ssh import SSHHook class SFTPOperation: @@ -48,15 +46,12 @@ class SFTPOperator(BaseOperator): This operator uses sftp_hook to open sftp transport channel that serve as basis for file transfer. :param ssh_conn_id: :ref:`ssh connection id` - from airflow Connections. `ssh_conn_id` will be ignored if `ssh_hook` - or `sftp_hook` is provided. + from airflow Connections. :param sftp_hook: predefined SFTPHook to use Either `sftp_hook` or `ssh_conn_id` needs to be provided. - :param ssh_hook: Deprecated - predefined SSHHook to use for remote execution - Use `sftp_hook` instead. :param remote_host: remote host to connect (templated) Nullable. If provided, it will replace the `remote_host` which was - defined in `sftp_hook`/`ssh_hook` or predefined in the connection of `ssh_conn_id`. + defined in `sftp_hook` or predefined in the connection of `ssh_conn_id`. :param local_filepath: local file path or list of local file paths to get or put. (templated) :param remote_filepath: remote file path or list of remote file paths to get or put. (templated) :param operation: specify operation 'get' or 'put', defaults to put @@ -86,7 +81,6 @@ class SFTPOperator(BaseOperator): def __init__( self, *, - ssh_hook: SSHHook | None = None, sftp_hook: SFTPHook | None = None, ssh_conn_id: str | None = None, remote_host: str | None = None, @@ -98,7 +92,6 @@ def __init__( **kwargs, ) -> None: super().__init__(**kwargs) - self.ssh_hook = ssh_hook self.sftp_hook = sftp_hook self.ssh_conn_id = ssh_conn_id self.remote_host = remote_host @@ -131,35 +124,13 @@ def execute(self, context: Any) -> str | list[str] | None: f"expected {SFTPOperation.GET} or {SFTPOperation.PUT}." ) - # TODO: remove support for ssh_hook in next major provider version in hook and operator - if self.ssh_hook is not None and self.sftp_hook is not None: - raise AirflowException( - "Both `ssh_hook` and `sftp_hook` are defined. Please use only one of them." - ) - - if self.ssh_hook is not None: - if not isinstance(self.ssh_hook, SSHHook): - self.log.info("ssh_hook is invalid. Trying ssh_conn_id to create SFTPHook.") - self.sftp_hook = SFTPHook(ssh_conn_id=self.ssh_conn_id) - if self.sftp_hook is None: - warnings.warn( - "Parameter `ssh_hook` is deprecated. " - "Please use `sftp_hook` instead. " - "The old parameter `ssh_hook` will be removed in a future version.", - AirflowProviderDeprecationWarning, - stacklevel=2, - ) - self.sftp_hook = SFTPHook(ssh_hook=self.ssh_hook) - file_msg = None try: if self.ssh_conn_id: if self.sftp_hook and isinstance(self.sftp_hook, SFTPHook): - self.log.info("ssh_conn_id is ignored when sftp_hook/ssh_hook is provided.") + self.log.info("ssh_conn_id is ignored when sftp_hook is provided.") else: - self.log.info( - "sftp_hook/ssh_hook not provided or invalid. Trying ssh_conn_id to create SFTPHook." - ) + self.log.info("sftp_hook not provided or invalid. Trying ssh_conn_id to create SFTPHook.") self.sftp_hook = SFTPHook(ssh_conn_id=self.ssh_conn_id) if not self.sftp_hook: @@ -217,7 +188,7 @@ def get_openlineage_facets_on_start(self): exc_info=True, ) - hook = self.sftp_hook or self.ssh_hook or SFTPHook(ssh_conn_id=self.ssh_conn_id) + hook = self.sftp_hook or SFTPHook(ssh_conn_id=self.ssh_conn_id) if self.remote_host is not None: remote_host = self.remote_host @@ -235,8 +206,6 @@ def get_openlineage_facets_on_start(self): if hasattr(hook, "port"): remote_port = hook.port - elif hasattr(hook, "ssh_hook"): - remote_port = hook.ssh_hook.port # Since v4.1.0, SFTPOperator accepts both a string (single file) and a list of # strings (multiple files) as local_filepath and remote_filepath, and internally diff --git a/providers/tests/sftp/hooks/test_sftp.py b/providers/tests/sftp/hooks/test_sftp.py index b9c1c33683ca7..5979ed0b80f09 100644 --- a/providers/tests/sftp/hooks/test_sftp.py +++ b/providers/tests/sftp/hooks/test_sftp.py @@ -30,10 +30,9 @@ from asyncssh import SFTPAttrs, SFTPNoSuchFile from asyncssh.sftp import SFTPName -from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning +from airflow.exceptions import AirflowException from airflow.models import Connection from airflow.providers.sftp.hooks.sftp import SFTPHook, SFTPHookAsync -from airflow.providers.ssh.hooks.ssh import SSHHook from airflow.utils.session import provide_session pytestmark = pytest.mark.db_test @@ -389,44 +388,6 @@ def test_connection_success(self, mock_get_connection): assert status is True assert msg == "Connection successfully tested" - @mock.patch("airflow.providers.sftp.hooks.sftp.SFTPHook.get_connection") - def test_deprecation_ftp_conn_id(self, mock_get_connection): - connection = Connection(conn_id="ftp_default", login="login", host="host") - mock_get_connection.return_value = connection - # If `ftp_conn_id` is provided, it will be used but would show a deprecation warning. - with pytest.warns(AirflowProviderDeprecationWarning, match=r"Parameter `ftp_conn_id` is deprecated"): - assert SFTPHook(ftp_conn_id="ftp_default").ssh_conn_id == "ftp_default" - - # If both are provided, ftp_conn_id will be used but would show a deprecation warning. - with pytest.warns(AirflowProviderDeprecationWarning, match=r"Parameter `ftp_conn_id` is deprecated"): - assert ( - SFTPHook(ftp_conn_id="ftp_default", ssh_conn_id="sftp_default").ssh_conn_id == "ftp_default" - ) - - # If `ssh_conn_id` is provided, it should use it for ssh_conn_id - assert SFTPHook(ssh_conn_id="sftp_default").ssh_conn_id == "sftp_default" - # Default is 'sftp_default - assert SFTPHook().ssh_conn_id == "sftp_default" - - @mock.patch("airflow.providers.sftp.hooks.sftp.SFTPHook.get_connection") - def test_invalid_ssh_hook(self, mock_get_connection): - connection = Connection(conn_id="sftp_default", login="root", host="localhost") - mock_get_connection.return_value = connection - with ( - pytest.raises(AirflowException, match="ssh_hook must be an instance of SSHHook"), - pytest.warns(AirflowProviderDeprecationWarning, match=r"Parameter `ssh_hook` is deprecated.*"), - ): - SFTPHook(ssh_hook="invalid_hook") - - @mock.patch("airflow.providers.ssh.hooks.ssh.SSHHook.get_connection") - def test_valid_ssh_hook(self, mock_get_connection): - connection = Connection(conn_id="sftp_test", login="root", host="localhost") - mock_get_connection.return_value = connection - with pytest.warns(AirflowProviderDeprecationWarning, match=r"Parameter `ssh_hook` is deprecated.*"): - hook = SFTPHook(ssh_hook=SSHHook(ssh_conn_id="sftp_test")) - assert hook.ssh_conn_id == "sftp_test" - assert isinstance(hook.get_conn(), paramiko.SFTPClient) - def test_get_suffix_pattern_match(self): output = self.hook.get_file_by_pattern(self.temp_dir, "*.txt") # In CI files might have different name, so we check that file found rather than actual name diff --git a/providers/tests/sftp/operators/test_sftp.py b/providers/tests/sftp/operators/test_sftp.py index 2bd4be3d269e6..86965e0ed1913 100644 --- a/providers/tests/sftp/operators/test_sftp.py +++ b/providers/tests/sftp/operators/test_sftp.py @@ -26,7 +26,7 @@ import paramiko import pytest -from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning +from airflow.exceptions import AirflowException from airflow.models import DAG, Connection from airflow.providers.common.compat.openlineage.facet import Dataset from airflow.providers.sftp.hooks.sftp import SFTPHook @@ -110,7 +110,7 @@ def test_pickle_file_transfer_put(self, dag_maker): with dag_maker(dag_id="unit_tests_sftp_op_pickle_file_transfer_put", start_date=DEFAULT_DATE): SFTPOperator( # Put test file to remote. task_id="put_test_task", - ssh_hook=self.hook, + sftp_hook=self.sftp_hook, local_filepath=self.test_local_filepath, remote_filepath=self.test_remote_filepath, operation=SFTPOperation.PUT, @@ -124,9 +124,8 @@ def test_pickle_file_transfer_put(self, dag_maker): ) tis = {ti.task_id: ti for ti in dag_maker.create_dagrun().task_instances} - with pytest.warns(AirflowProviderDeprecationWarning, match="Parameter `ssh_hook` is deprecated..*"): - tis["put_test_task"].run() - tis["check_file_task"].run() + tis["put_test_task"].run() + tis["check_file_task"].run() pulled = tis["check_file_task"].xcom_pull(task_ids="check_file_task", key="return_value") assert pulled.strip() == test_local_file_content @@ -148,7 +147,7 @@ def test_file_transfer_no_intermediate_dir_error_put(self, create_task_instance_ SFTPOperator, dag_id="unit_tests_sftp_op_file_transfer_no_intermediate_dir_error_put", task_id="test_sftp", - ssh_hook=self.hook, + sftp_hook=self.sftp_hook, local_filepath=self.test_local_filepath, remote_filepath=self.test_remote_filepath_int_dir, operation=SFTPOperation.PUT, @@ -156,7 +155,6 @@ def test_file_transfer_no_intermediate_dir_error_put(self, create_task_instance_ ) with ( pytest.raises(AirflowException) as ctx, - pytest.warns(AirflowProviderDeprecationWarning, match="Parameter `ssh_hook` is deprecated..*"), ): ti2.run() assert "No such file" in str(ctx.value) @@ -175,7 +173,7 @@ def test_file_transfer_with_intermediate_dir_put(self, dag_maker): with dag_maker(dag_id="unit_tests_sftp_op_file_transfer_with_intermediate_dir_put"): SFTPOperator( # Put test file to remote. task_id="test_sftp", - ssh_hook=self.hook, + sftp_hook=self.sftp_hook, local_filepath=self.test_local_filepath, remote_filepath=self.test_remote_filepath_int_dir, operation=SFTPOperation.PUT, @@ -189,10 +187,8 @@ def test_file_transfer_with_intermediate_dir_put(self, dag_maker): ) dagrun = dag_maker.create_dagrun(logical_date=timezone.utcnow()) tis = {ti.task_id: ti for ti in dagrun.task_instances} - with pytest.warns(AirflowProviderDeprecationWarning, match="Parameter `ssh_hook` is deprecated..*"): - tis["test_sftp"].run() - tis["test_check_file"].run() - + tis["test_sftp"].run() + tis["test_check_file"].run() pulled = tis["test_check_file"].xcom_pull(task_ids="test_check_file", key="return_value") assert pulled.strip() == test_local_file_content @@ -209,7 +205,7 @@ def test_json_file_transfer_put(self, dag_maker): with dag_maker(dag_id="unit_tests_sftp_op_json_file_transfer_put"): SFTPOperator( # Put test file to remote. task_id="put_test_task", - ssh_hook=self.hook, + sftp_hook=self.sftp_hook, local_filepath=self.test_local_filepath, remote_filepath=self.test_remote_filepath, operation=SFTPOperation.PUT, @@ -222,9 +218,9 @@ def test_json_file_transfer_put(self, dag_maker): ) dagrun = dag_maker.create_dagrun(logical_date=timezone.utcnow()) tis = {ti.task_id: ti for ti in dagrun.task_instances} - with pytest.warns(AirflowProviderDeprecationWarning, match="Parameter `ssh_hook` is deprecated..*"): - tis["put_test_task"].run() - tis["check_file_task"].run() + + tis["put_test_task"].run() + tis["check_file_task"].run() pulled = tis["check_file_task"].xcom_pull(task_ids="check_file_task", key="return_value") assert pulled.strip() == b64encode(test_local_file_content).decode("utf-8") @@ -242,16 +238,13 @@ def test_pickle_file_transfer_get(self, dag_maker, create_remote_file_and_cleanu with dag_maker(dag_id="unit_tests_sftp_op_pickle_file_transfer_get"): SFTPOperator( # Get remote file to local. task_id="test_sftp", - ssh_hook=self.hook, + sftp_hook=self.sftp_hook, local_filepath=self.test_local_filepath, remote_filepath=self.test_remote_filepath, operation=SFTPOperation.GET, ) for ti in dag_maker.create_dagrun(logical_date=timezone.utcnow()).task_instances: - with pytest.warns( - AirflowProviderDeprecationWarning, match="Parameter `ssh_hook` is deprecated..*" - ): - ti.run() + ti.run() # Test the received content. with open(self.test_local_filepath, "rb") as file: @@ -263,16 +256,13 @@ def test_json_file_transfer_get(self, dag_maker, create_remote_file_and_cleanup) with dag_maker(dag_id="unit_tests_sftp_op_json_file_transfer_get"): SFTPOperator( # Get remote file to local. task_id="test_sftp", - ssh_hook=self.hook, + sftp_hook=self.sftp_hook, local_filepath=self.test_local_filepath, remote_filepath=self.test_remote_filepath, operation=SFTPOperation.GET, ) for ti in dag_maker.create_dagrun(logical_date=timezone.utcnow()).task_instances: - with pytest.warns( - AirflowProviderDeprecationWarning, match="Parameter `ssh_hook` is deprecated..*" - ): - ti.run() + ti.run() # Test the received content. content_received = None @@ -286,7 +276,7 @@ def test_file_transfer_no_intermediate_dir_error_get(self, dag_maker, create_rem with dag_maker(dag_id="unit_tests_sftp_op_file_transfer_no_intermediate_dir_error_get"): SFTPOperator( # Try to GET test file from remote. task_id="test_sftp", - ssh_hook=self.hook, + sftp_hook=self.sftp_hook, local_filepath=self.test_local_filepath_int_dir, remote_filepath=self.test_remote_filepath, operation=SFTPOperation.GET, @@ -297,9 +287,6 @@ def test_file_transfer_no_intermediate_dir_error_get(self, dag_maker, create_rem # does not exist. with ( pytest.raises(AirflowException) as ctx, - pytest.warns( - AirflowProviderDeprecationWarning, match="Parameter `ssh_hook` is deprecated..*" - ), ): ti.run() assert "No such file" in str(ctx.value) @@ -310,7 +297,7 @@ def test_file_transfer_with_intermediate_dir_error_get(self, dag_maker, create_r with dag_maker(dag_id="unit_tests_sftp_op_file_transfer_with_intermediate_dir_error_get"): SFTPOperator( # Get remote file to local. task_id="test_sftp", - ssh_hook=self.hook, + sftp_hook=self.sftp_hook, local_filepath=self.test_local_filepath_int_dir, remote_filepath=self.test_remote_filepath, operation=SFTPOperation.GET, @@ -318,10 +305,7 @@ def test_file_transfer_with_intermediate_dir_error_get(self, dag_maker, create_r ) for ti in dag_maker.create_dagrun(logical_date=timezone.utcnow()).task_instances: - with pytest.warns( - AirflowProviderDeprecationWarning, match="Parameter `ssh_hook` is deprecated..*" - ): - ti.run() + ti.run() # Test the received content. content_received = None @@ -336,7 +320,7 @@ def test_arg_checking(self): schedule=None, default_args={"start_date": DEFAULT_DATE}, ) - # Exception should be raised if neither ssh_hook nor ssh_conn_id is provided + # Exception should be raised if ssh_conn_id is not provided task_0 = SFTPOperator( task_id="test_sftp_0", local_filepath=self.test_local_filepath, @@ -347,10 +331,9 @@ def test_arg_checking(self): with pytest.raises(AirflowException, match="Cannot operate without sftp_hook or ssh_conn_id."): task_0.execute(None) - # if ssh_hook is invalid/not provided, use ssh_conn_id to create SSHHook + # use ssh_conn_id to create SSHHook task_1 = SFTPOperator( task_id="test_sftp_1", - ssh_hook="string_rather_than_SSHHook", # type: ignore ssh_conn_id=TEST_CONN_ID, local_filepath=self.test_local_filepath, remote_filepath=self.test_remote_filepath, @@ -363,7 +346,7 @@ def test_arg_checking(self): task_2 = SFTPOperator( task_id="test_sftp_2", - ssh_conn_id=TEST_CONN_ID, # no ssh_hook provided + ssh_conn_id=TEST_CONN_ID, local_filepath=self.test_local_filepath, remote_filepath=self.test_remote_filepath, operation=SFTPOperation.PUT, @@ -373,53 +356,8 @@ def test_arg_checking(self): task_2.execute(None) assert task_2.sftp_hook.ssh_conn_id == TEST_CONN_ID - # if both valid ssh_hook and ssh_conn_id are provided, ignore ssh_conn_id task_3 = SFTPOperator( task_id="test_sftp_3", - ssh_hook=self.hook, - ssh_conn_id=TEST_CONN_ID, - local_filepath=self.test_local_filepath, - remote_filepath=self.test_remote_filepath, - operation=SFTPOperation.PUT, - dag=dag, - ) - with ( - contextlib.suppress(Exception), - pytest.warns(AirflowProviderDeprecationWarning, match="Parameter `ssh_hook` is deprecated..*"), - ): - task_3.execute(None) - assert task_3.sftp_hook.ssh_conn_id == self.hook.ssh_conn_id - - task_4 = SFTPOperator( - task_id="test_sftp_4", - local_filepath=self.test_local_filepath, - remote_filepath=self.test_remote_filepath, - operation="invalid_operation", - dag=dag, - ) - # Exception should be raised if operation is invalid - with pytest.raises(TypeError, match="Unsupported operation value invalid_operation, "): - task_4.execute(None) - - task_5 = SFTPOperator( - task_id="test_sftp_5", - ssh_hook=self.hook, - sftp_hook=SFTPHook(), - local_filepath=self.test_local_filepath, - remote_filepath=self.test_remote_filepath, - operation=SFTPOperation.PUT, - dag=dag, - ) - - # Exception should be raised if both ssh_hook and sftp_hook are provided - with pytest.raises( - AirflowException, - match="Both `ssh_hook` and `sftp_hook` are defined. Please use only one of them.", - ): - task_5.execute(None) - - task_6 = SFTPOperator( - task_id="test_sftp_6", ssh_conn_id=TEST_CONN_ID, remote_host="remotehost", local_filepath=self.test_local_filepath, @@ -428,8 +366,8 @@ def test_arg_checking(self): dag=dag, ) with contextlib.suppress(Exception): - task_6.execute(None) - assert task_6.sftp_hook.remote_host == "remotehost" + task_3.execute(None) + assert task_3.sftp_hook.remote_host == "remotehost" def test_unequal_local_remote_file_paths(self): with pytest.raises(ValueError): @@ -581,34 +519,3 @@ def test_extract_sftp_hook(self, get_connection, get_conn, operation, expected): assert lineage.inputs == expected[0] assert lineage.outputs == expected[1] - - @pytest.mark.parametrize( - "operation, expected", - TEST_GET_PUT_PARAMS, - ) - @mock.patch("airflow.providers.ssh.hooks.ssh.SSHHook.get_conn", spec=paramiko.SSHClient) - @mock.patch("airflow.providers.ssh.hooks.ssh.SSHHook.get_connection", spec=Connection) - def test_extract_ssh_hook(self, get_connection, get_conn, operation, expected): - get_connection.return_value = Connection( - conn_id="sftp_conn_id", - conn_type="sftp", - host="remotehost", - port=22, - ) - - dag_id = "sftp_dag" - task_id = "sftp_task" - - task = SFTPOperator( - task_id=task_id, - ssh_hook=SSHHook(ssh_conn_id="sftp_conn_id"), - dag=DAG(dag_id, schedule=None), - start_date=timezone.utcnow(), - local_filepath="/path/local", - remote_filepath="/path/remote", - operation=operation, - ) - lineage = task.get_openlineage_facets_on_start() - - assert lineage.inputs == expected[0] - assert lineage.outputs == expected[1] From 909ff713a47d6217592f919cddbbb6967e0fddf0 Mon Sep 17 00:00:00 2001 From: "D. Ferruzzi" Date: Sat, 7 Dec 2024 01:10:19 -0800 Subject: [PATCH 12/25] Random doc typos (#44750) * Random doc typos * Update contributing-docs/testing/unit_tests.rst Co-authored-by: Shahar Epstein <60007259+shahar1@users.noreply.github.com> * Update contributing-docs/testing/unit_tests.rst --------- Co-authored-by: Shahar Epstein <60007259+shahar1@users.noreply.github.com> --- contributing-docs/testing/unit_tests.rst | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/contributing-docs/testing/unit_tests.rst b/contributing-docs/testing/unit_tests.rst index 6c39b0780edac..12a2403b9a783 100644 --- a/contributing-docs/testing/unit_tests.rst +++ b/contributing-docs/testing/unit_tests.rst @@ -338,10 +338,10 @@ If your test accesses the database but is not marked properly the Non-DB test in How to verify if DB test is correctly classified ................................................ -When you add if you want to see if your DB test is correctly classified, you can run the test or group +If you want to see if your DB test is correctly classified, you can run the test or group of tests with ``--skip-db-tests`` flag. -You can run the all (or subset of) test types if you want to make sure all ot the problems are fixed +You can run the all (or subset of) test types if you want to make sure all of the problems are fixed .. code-block:: bash @@ -458,8 +458,8 @@ Do this: Problems with Non-DB test collection ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Sometimes, even if whole module is marked as ``@pytest.mark.db_test`` even parsing the file and collecting -tests will fail when ``--skip-db-tests`` is used because some of the imports od objects created in the +Sometimes, even if the whole module is marked as ``@pytest.mark.db_test``, parsing the file and collecting +tests will fail when ``--skip-db-tests`` is used because some of the imports or objects created in the module will read the database. Usually what helps is to move such initialization code to inside the tests or pytest fixtures (and pass @@ -1086,9 +1086,9 @@ directly to the container. Implementing compatibility for provider tests for older Airflow versions ........................................................................ -When you implement tests for providers, you should make sure that they are compatible with older +When you implement tests for providers, you should make sure that they are compatible with older Airflow versions. -Note that some of the tests if written without taking care about the compatibility, might not work with older +Note that some of the tests, if written without taking care about the compatibility, might not work with older versions of Airflow - this is because of refactorings, renames, and tests relying on internals of Airflow that are not part of the public API. We deal with it in one of the following ways: From c73becd1a7b5b38a5b0dfe0cb9d02b076bfe6f32 Mon Sep 17 00:00:00 2001 From: Jens Scheffler <95105677+jscheffl@users.noreply.github.com> Date: Sat, 7 Dec 2024 10:13:10 +0100 Subject: [PATCH 13/25] Remove Provider Deprecations in Sqlite (#44707) * Remove Provider Deprecations in Sqlite * Adjust docs after deprecation --- .../operators.rst | 4 +- .../airflow/providers/sqlite/CHANGELOG.rst | 13 +++ .../providers/sqlite/operators/__init__.py | 17 ---- .../providers/sqlite/operators/sqlite.py | 59 ------------- .../airflow/providers/sqlite/provider.yaml | 6 -- providers/tests/sqlite/operators/__init__.py | 17 ---- .../tests/sqlite/operators/test_sqlite.py | 85 ------------------- 7 files changed, 15 insertions(+), 186 deletions(-) delete mode 100644 providers/src/airflow/providers/sqlite/operators/__init__.py delete mode 100644 providers/src/airflow/providers/sqlite/operators/sqlite.py delete mode 100644 providers/tests/sqlite/operators/__init__.py delete mode 100644 providers/tests/sqlite/operators/test_sqlite.py diff --git a/docs/apache-airflow-providers-sqlite/operators.rst b/docs/apache-airflow-providers-sqlite/operators.rst index 5b113162ae4c3..086192325cdc8 100644 --- a/docs/apache-airflow-providers-sqlite/operators.rst +++ b/docs/apache-airflow-providers-sqlite/operators.rst @@ -25,8 +25,8 @@ SQLExecuteQueryOperator to connect to Sqlite Use the :class:`SQLExecuteQueryOperator` to execute Sqlite commands in a `Sqlite `__ database. -.. warning:: - Previously, SqliteOperator was used to perform this kind of operation. But at the moment SqliteOperator is deprecated and will be removed in future versions of the provider. Please consider to switch to SQLExecuteQueryOperator as soon as possible. +.. note:: + Previously, ``SqliteOperator`` was used to perform this kind of operation. After deprecation this has been removed. Please use ``SQLExecuteQueryOperator`` instead. Using the Operator ^^^^^^^^^^^^^^^^^^ diff --git a/providers/src/airflow/providers/sqlite/CHANGELOG.rst b/providers/src/airflow/providers/sqlite/CHANGELOG.rst index d69e941316447..33727da0ab1b7 100644 --- a/providers/src/airflow/providers/sqlite/CHANGELOG.rst +++ b/providers/src/airflow/providers/sqlite/CHANGELOG.rst @@ -27,6 +27,19 @@ Changelog --------- +main +.... + +Breaking changes +~~~~~~~~~~~~~~~~ + +.. warning:: + All deprecated classes, parameters and features have been removed from the Sqlite provider package. + The following breaking changes were introduced: + + * Operators + * Remove ``airflow.providers.sqlite.operators.sqlite.SqliteOperator``. Please use ``airflow.providers.common.sql.operators.sql.SQLExecuteQueryOperator``. + 3.9.1 ..... diff --git a/providers/src/airflow/providers/sqlite/operators/__init__.py b/providers/src/airflow/providers/sqlite/operators/__init__.py deleted file mode 100644 index 217e5db960782..0000000000000 --- a/providers/src/airflow/providers/sqlite/operators/__init__.py +++ /dev/null @@ -1,17 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. diff --git a/providers/src/airflow/providers/sqlite/operators/sqlite.py b/providers/src/airflow/providers/sqlite/operators/sqlite.py deleted file mode 100644 index 25df70c8abf72..0000000000000 --- a/providers/src/airflow/providers/sqlite/operators/sqlite.py +++ /dev/null @@ -1,59 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -from __future__ import annotations - -from collections.abc import Sequence -from typing import ClassVar - -from deprecated import deprecated - -from airflow.exceptions import AirflowProviderDeprecationWarning -from airflow.providers.common.sql.operators.sql import SQLExecuteQueryOperator - - -@deprecated( - reason="Please use `airflow.providers.common.sql.operators.sql.SQLExecuteQueryOperator`.", - category=AirflowProviderDeprecationWarning, -) -class SqliteOperator(SQLExecuteQueryOperator): - """ - Executes sql code in a specific Sqlite database. - - This class is deprecated. - - Please use :class:`airflow.providers.common.sql.operators.sql.SQLExecuteQueryOperator`. - - .. seealso:: - For more information on how to use this operator, take a look at the guide: - :ref:`howto/operator:SqliteOperator` - - :param sql: the sql code to be executed. Can receive a str representing a - sql statement, a list of str (sql statements), or reference to a template file. - Template reference are recognized by str ending in '.sql' - (templated) - :param sqlite_conn_id: reference to a specific sqlite database - :param parameters: (optional) the parameters to render the SQL query with. - """ - - template_fields: Sequence[str] = ("sql",) - template_ext: Sequence[str] = (".sql",) - template_fields_renderers: ClassVar[dict] = {"sql": "sql"} - ui_color = "#cdaaed" - - def __init__(self, *, sqlite_conn_id: str = "sqlite_default", **kwargs) -> None: - super().__init__(conn_id=sqlite_conn_id, **kwargs) diff --git a/providers/src/airflow/providers/sqlite/provider.yaml b/providers/src/airflow/providers/sqlite/provider.yaml index 473c211966e61..3ab2417cf2c06 100644 --- a/providers/src/airflow/providers/sqlite/provider.yaml +++ b/providers/src/airflow/providers/sqlite/provider.yaml @@ -68,12 +68,6 @@ integrations: logo: /integration-logos/sqlite/SQLite.png tags: [software] -operators: - - integration-name: SQLite - - python-modules: - - airflow.providers.sqlite.operators.sqlite - hooks: - integration-name: SQLite python-modules: diff --git a/providers/tests/sqlite/operators/__init__.py b/providers/tests/sqlite/operators/__init__.py deleted file mode 100644 index 217e5db960782..0000000000000 --- a/providers/tests/sqlite/operators/__init__.py +++ /dev/null @@ -1,17 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. diff --git a/providers/tests/sqlite/operators/test_sqlite.py b/providers/tests/sqlite/operators/test_sqlite.py deleted file mode 100644 index 7b95c5d932a58..0000000000000 --- a/providers/tests/sqlite/operators/test_sqlite.py +++ /dev/null @@ -1,85 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -from __future__ import annotations - -import pytest - -from airflow.models.dag import DAG -from airflow.providers.common.sql.operators.sql import SQLExecuteQueryOperator -from airflow.utils import timezone - -DEFAULT_DATE = timezone.datetime(2015, 1, 1) -DEFAULT_DATE_ISO = DEFAULT_DATE.isoformat() -DEFAULT_DATE_DS = DEFAULT_DATE_ISO[:10] -TEST_DAG_ID = "unit_test_dag" - - -@pytest.mark.backend("sqlite") -class TestSqliteOperator: - def setup_method(self): - args = {"owner": "airflow", "start_date": DEFAULT_DATE} - dag = DAG(TEST_DAG_ID, schedule=None, default_args=args) - self.dag = dag - - def teardown_method(self): - tables_to_drop = ["test_airflow", "test_airflow2"] - from airflow.providers.sqlite.hooks.sqlite import SqliteHook - - with SqliteHook().get_conn() as conn: - cur = conn.cursor() - for table in tables_to_drop: - cur.execute(f"DROP TABLE IF EXISTS {table}") - - def test_sqlite_operator_with_one_statement(self): - sql = """ - CREATE TABLE IF NOT EXISTS test_airflow ( - dummy VARCHAR(50) - ); - """ - op = SQLExecuteQueryOperator(task_id="basic_sqlite", sql=sql, dag=self.dag, conn_id="sqlite_default") - op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True) - - def test_sqlite_operator_with_multiple_statements(self): - sql = [ - "CREATE TABLE IF NOT EXISTS test_airflow (dummy VARCHAR(50))", - "INSERT INTO test_airflow VALUES ('X')", - ] - op = SQLExecuteQueryOperator( - task_id="sqlite_operator_with_multiple_statements", - sql=sql, - dag=self.dag, - conn_id="sqlite_default", - ) - op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True) - - def test_sqlite_operator_with_invalid_sql(self): - sql = [ - "CREATE TABLE IF NOT EXISTS test_airflow (dummy VARCHAR(50))", - "INSERT INTO test_airflow2 VALUES ('X')", - ] - - from sqlite3 import OperationalError - - op = SQLExecuteQueryOperator( - task_id="sqlite_operator_with_multiple_statements", - sql=sql, - dag=self.dag, - conn_id="sqlite_default", - ) - with pytest.raises(OperationalError, match="no such table: test_airflow2"): - op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True) From ee6167e1505d69f392fb20cce8c32ec3bd2a0f1a Mon Sep 17 00:00:00 2001 From: vatsrahul1001 <43964496+vatsrahul1001@users.noreply.github.com> Date: Sat, 7 Dec 2024 16:03:07 +0530 Subject: [PATCH 14/25] remove deprecations (#44756) --- .../airflow/providers/snowflake/CHANGELOG.rst | 9 ++ .../providers/snowflake/hooks/snowflake.py | 3 - .../snowflake/operators/snowflake.py | 107 +----------------- 3 files changed, 12 insertions(+), 107 deletions(-) diff --git a/providers/src/airflow/providers/snowflake/CHANGELOG.rst b/providers/src/airflow/providers/snowflake/CHANGELOG.rst index 3eb83f39e75a1..a61a808e5992e 100644 --- a/providers/src/airflow/providers/snowflake/CHANGELOG.rst +++ b/providers/src/airflow/providers/snowflake/CHANGELOG.rst @@ -27,6 +27,15 @@ Changelog --------- +main +.... + +.. warning:: + All deprecated classes, parameters and features have been removed from the snowflake provider package. + The following breaking changes were introduced: + + * Removed deprecated ``SnowflakeOperator``. Use ``SQLExecuteQueryOperator`` instead. + 5.8.1 ..... diff --git a/providers/src/airflow/providers/snowflake/hooks/snowflake.py b/providers/src/airflow/providers/snowflake/hooks/snowflake.py index f9e40942bb374..5571ac8fc66c8 100644 --- a/providers/src/airflow/providers/snowflake/hooks/snowflake.py +++ b/providers/src/airflow/providers/snowflake/hooks/snowflake.py @@ -82,9 +82,6 @@ class SnowflakeHook(DbApiHook): .. note:: ``get_sqlalchemy_engine()`` depends on ``snowflake-sqlalchemy`` - .. seealso:: - For more information on how to use this Snowflake connection, take a look at the guide: - :ref:`howto/operator:SnowflakeOperator` """ conn_name_attr = "snowflake_conn_id" diff --git a/providers/src/airflow/providers/snowflake/operators/snowflake.py b/providers/src/airflow/providers/snowflake/operators/snowflake.py index 859e88d7d27f0..ffce0c56325a1 100644 --- a/providers/src/airflow/providers/snowflake/operators/snowflake.py +++ b/providers/src/airflow/providers/snowflake/operators/snowflake.py @@ -20,12 +20,10 @@ import time from collections.abc import Iterable, Mapping, Sequence from datetime import timedelta -from typing import TYPE_CHECKING, Any, ClassVar, SupportsAbs, cast - -from deprecated import deprecated +from typing import TYPE_CHECKING, Any, SupportsAbs, cast from airflow.configuration import conf -from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning +from airflow.exceptions import AirflowException from airflow.providers.common.sql.operators.sql import ( SQLCheckOperator, SQLExecuteQueryOperator, @@ -39,105 +37,6 @@ from airflow.utils.context import Context -@deprecated( - reason=( - "This class is deprecated. Please use " - "`airflow.providers.common.sql.operators.sql.SQLExecuteQueryOperator`. " - "Also, you can provide `hook_params={'warehouse': , 'database': , " - "'role': , 'schema': , 'authenticator': ," - "'session_parameters': }`." - ), - category=AirflowProviderDeprecationWarning, -) -class SnowflakeOperator(SQLExecuteQueryOperator): - """ - Executes SQL code in a Snowflake database. - - This class is deprecated. - - Please use :class:`airflow.providers.common.sql.operators.sql.SQLExecuteQueryOperator`. - - .. seealso:: - For more information on how to use this operator, take a look at the guide: - :ref:`howto/operator:SnowflakeOperator` - - :param snowflake_conn_id: Reference to - :ref:`Snowflake connection id` - :param sql: the SQL code to be executed as a single string, or - a list of str (sql statements), or a reference to a template file. - Template references are recognized by str ending in '.sql' - :param parameters: (optional) the parameters to render the SQL query with. - :param warehouse: name of warehouse (will overwrite any warehouse - defined in the connection's extra JSON) - :param database: name of database (will overwrite database defined - in connection) - :param schema: name of schema (will overwrite schema defined in - connection) - :param role: name of role (will overwrite any role defined in - connection's extra JSON) - :param authenticator: authenticator for Snowflake. - 'snowflake' (default) to use the internal Snowflake authenticator - 'externalbrowser' to authenticate using your web browser and - Okta, ADFS or any other SAML 2.0-compliant identify provider - (IdP) that has been defined for your account - 'https://.okta.com' to authenticate - through native Okta. - :param session_parameters: You can set session-level parameters at - the time you connect to Snowflake - :return Returns list of dictionaries in { 'column': 'value', 'column2': 'value2' } form. - """ - - template_fields: Sequence[str] = ("sql",) - template_ext: Sequence[str] = (".sql",) - template_fields_renderers: ClassVar[dict] = {"sql": "sql"} - ui_color = "#ededed" - - def __init__( - self, - *, - snowflake_conn_id: str = "snowflake_default", - warehouse: str | None = None, - database: str | None = None, - role: str | None = None, - schema: str | None = None, - authenticator: str | None = None, - session_parameters: dict | None = None, - **kwargs, - ) -> None: - if any([warehouse, database, role, schema, authenticator, session_parameters]): - hook_params = kwargs.pop("hook_params", {}) - kwargs["hook_params"] = { - "warehouse": warehouse, - "database": database, - "role": role, - "schema": schema, - "authenticator": authenticator, - "session_parameters": session_parameters, - **hook_params, - } - super().__init__(conn_id=snowflake_conn_id, **kwargs) - - def _process_output(self, results: list[Any], descriptions: list[Sequence[Sequence] | None]) -> list[Any]: - validated_descriptions: list[Sequence[Sequence]] = [] - for idx, description in enumerate(descriptions): - if not description: - raise RuntimeError( - f"The query did not return descriptions of the cursor for query number {idx}. " - "Cannot return values in a form of dictionary for that query." - ) - validated_descriptions.append(description) - returned_results = [] - for result_id, result_list in enumerate(results): - current_processed_result = [] - for row in result_list: - dict_result: dict[Any, Any] = {} - for idx, description in enumerate(validated_descriptions[result_id]): - dict_result[description[0]] = row[idx] - current_processed_result.append(dict_result) - returned_results.append(current_processed_result) - return returned_results - - class SnowflakeCheckOperator(SQLCheckOperator): """ Performs a check against Snowflake. @@ -392,7 +291,7 @@ def __init__( class SnowflakeSqlApiOperator(SQLExecuteQueryOperator): """ Implemented Snowflake SQL API Operator to support multiple SQL statements sequentially, - which is the behavior of the SnowflakeOperator, the Snowflake SQL API allows submitting + which is the behavior of the SQLExecuteQueryOperator, the Snowflake SQL API allows submitting multiple SQL statements in a single request. It make post request to submit SQL statements for execution, poll to check the status of the execution of a statement. Fetch query results concurrently. From dc4703a7aad41692c29c17384da1808a4a3064aa Mon Sep 17 00:00:00 2001 From: vatsrahul1001 <43964496+vatsrahul1001@users.noreply.github.com> Date: Sat, 7 Dec 2024 16:04:33 +0530 Subject: [PATCH 15/25] remove deprecations (#44757) --- .../airflow/providers/tableau/CHANGELOG.rst | 9 +++ .../providers/tableau/hooks/tableau.py | 27 +-------- providers/tests/tableau/hooks/test_tableau.py | 58 ++----------------- 3 files changed, 16 insertions(+), 78 deletions(-) diff --git a/providers/src/airflow/providers/tableau/CHANGELOG.rst b/providers/src/airflow/providers/tableau/CHANGELOG.rst index 8084568faeb4d..23b2f4ba529a8 100644 --- a/providers/src/airflow/providers/tableau/CHANGELOG.rst +++ b/providers/src/airflow/providers/tableau/CHANGELOG.rst @@ -27,6 +27,15 @@ Changelog --------- +main +.... + +.. warning:: + All deprecated classes, parameters and features have been removed from the tableau provider package. + The following breaking changes were introduced: + + * Removed deprecated Authentication via ``personal access token``. Use ``password`` authentication instead. + 4.6.1 ..... diff --git a/providers/src/airflow/providers/tableau/hooks/tableau.py b/providers/src/airflow/providers/tableau/hooks/tableau.py index 38455e56c407c..ce0ddb2b65e31 100644 --- a/providers/src/airflow/providers/tableau/hooks/tableau.py +++ b/providers/src/airflow/providers/tableau/hooks/tableau.py @@ -20,10 +20,9 @@ from enum import Enum from typing import TYPE_CHECKING, Any -from deprecated import deprecated -from tableauserverclient import Pager, PersonalAccessTokenAuth, Server, TableauAuth +from tableauserverclient import Pager, Server, TableauAuth -from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning +from airflow.exceptions import AirflowException from airflow.hooks.base import BaseHook if TYPE_CHECKING: @@ -112,8 +111,6 @@ def get_conn(self) -> Auth.contextmgr: """ if self.conn.login and self.conn.password: return self._auth_via_password() - if "token_name" in self.conn.extra_dejson and "personal_access_token" in self.conn.extra_dejson: - return self._auth_via_token() raise NotImplementedError("No Authentication method found for given Credentials!") def _auth_via_password(self) -> Auth.contextmgr: @@ -122,26 +119,6 @@ def _auth_via_password(self) -> Auth.contextmgr: ) return self.server.auth.sign_in(tableau_auth) - @deprecated( - reason=( - "Authentication via personal access token is deprecated. " - "Please, use the password authentication to avoid inconsistencies." - ), - category=AirflowProviderDeprecationWarning, - ) - def _auth_via_token(self) -> Auth.contextmgr: - """ - Authenticate via personal access token. - - This method is deprecated. Please, use the authentication via password instead. - """ - tableau_auth = PersonalAccessTokenAuth( - token_name=self.conn.extra_dejson["token_name"], - personal_access_token=self.conn.extra_dejson["personal_access_token"], - site_id=self.site_id, - ) - return self.server.auth.sign_in_with_personal_access_token(tableau_auth) - def get_all(self, resource_name: str) -> Pager: """ Get all items of the given resource. diff --git a/providers/tests/tableau/hooks/test_tableau.py b/providers/tests/tableau/hooks/test_tableau.py index 23d5e0188f609..241367b4f6989 100644 --- a/providers/tests/tableau/hooks/test_tableau.py +++ b/providers/tests/tableau/hooks/test_tableau.py @@ -21,7 +21,6 @@ import pytest from airflow import configuration, models -from airflow.exceptions import AirflowProviderDeprecationWarning from airflow.providers.tableau.hooks.tableau import TableauHook, TableauJobFinishCode from airflow.utils import db @@ -46,14 +45,6 @@ def setup_method(self): extra='{"site_id": "my_site"}', ) ) - db.merge_conn( - models.Connection( - conn_id="tableau_test_token", - conn_type="tableau", - host="tableau", - extra='{"token_name": "my_token", "personal_access_token": "my_personal_access_token"}', - ) - ) db.merge_conn( models.Connection( conn_id="tableau_test_ssl_connection_certificates_path", @@ -84,14 +75,6 @@ def setup_method(self): extra='{"verify": false}', ) ) - db.merge_conn( - models.Connection( - conn_id="tableau_test_ssl_connection_default", - conn_type="tableau", - host="tableau", - extra='{"token_name": "my_token", "personal_access_token": "my_personal_access_token"}', - ) - ) @patch("airflow.providers.tableau.hooks.tableau.TableauAuth") @patch("airflow.providers.tableau.hooks.tableau.Server") @@ -109,30 +92,6 @@ def test_get_conn_auth_via_password_and_site_in_connection(self, mock_server, mo mock_server.return_value.auth.sign_in.assert_called_once_with(mock_tableau_auth.return_value) mock_server.return_value.auth.sign_out.assert_called_once_with() - @patch("airflow.providers.tableau.hooks.tableau.PersonalAccessTokenAuth") - @patch("airflow.providers.tableau.hooks.tableau.Server") - def test_get_conn_auth_via_token_and_site_in_init(self, mock_server, mock_tableau_auth): - """ - Test get conn auth via token - """ - with ( - pytest.warns( - AirflowProviderDeprecationWarning, - match="Authentication via personal access token is deprecated..*", - ), - TableauHook(site_id="test", tableau_conn_id="tableau_test_token") as tableau_hook, - ): - mock_server.assert_called_once_with(tableau_hook.conn.host) - mock_tableau_auth.assert_called_once_with( - token_name=tableau_hook.conn.extra_dejson["token_name"], - personal_access_token=tableau_hook.conn.extra_dejson["personal_access_token"], - site_id=tableau_hook.site_id, - ) - mock_server.return_value.auth.sign_in_with_personal_access_token.assert_called_once_with( - mock_tableau_auth.return_value - ) - mock_server.return_value.auth.sign_out.assert_called_once_with() - @patch("airflow.providers.tableau.hooks.tableau.TableauAuth") @patch("airflow.providers.tableau.hooks.tableau.Server") def test_get_conn_ssl_cert_path(self, mock_server, mock_tableau_auth): @@ -155,30 +114,23 @@ def test_get_conn_ssl_cert_path(self, mock_server, mock_tableau_auth): mock_server.return_value.auth.sign_in.assert_called_once_with(mock_tableau_auth.return_value) mock_server.return_value.auth.sign_out.assert_called_once_with() - @patch("airflow.providers.tableau.hooks.tableau.PersonalAccessTokenAuth") + @patch("airflow.providers.tableau.hooks.tableau.TableauAuth") @patch("airflow.providers.tableau.hooks.tableau.Server") def test_get_conn_ssl_default(self, mock_server, mock_tableau_auth): """ Test get conn with default SSL parameters """ with ( - pytest.warns( - AirflowProviderDeprecationWarning, - match="Authentication via personal access token is deprecated..*", - ), - TableauHook(tableau_conn_id="tableau_test_ssl_connection_default") as tableau_hook, + TableauHook(tableau_conn_id="tableau_test_password") as tableau_hook, ): mock_server.assert_called_once_with(tableau_hook.conn.host) mock_server.return_value.add_http_options.assert_called_once_with( options_dict={"verify": True, "cert": None} ) mock_tableau_auth.assert_called_once_with( - token_name=tableau_hook.conn.extra_dejson["token_name"], - personal_access_token=tableau_hook.conn.extra_dejson["personal_access_token"], - site_id="", - ) - mock_server.return_value.auth.sign_in_with_personal_access_token.assert_called_once_with( - mock_tableau_auth.return_value + username=tableau_hook.conn.login, + password=tableau_hook.conn.password, + site_id=tableau_hook.conn.extra_dejson["site_id"], ) mock_server.return_value.auth.sign_out.assert_called_once_with() From e26909df6af86cc18a272d993ad45ab17dfa333a Mon Sep 17 00:00:00 2001 From: Maciej Obuchowski Date: Sat, 7 Dec 2024 13:23:48 +0100 Subject: [PATCH 16/25] add clear_number to OpenLineage's dagrun-level event run id generation (#44617) Signed-off-by: Maciej Obuchowski --- .../providers/openlineage/plugins/adapter.py | 14 +++++----- .../providers/openlineage/plugins/listener.py | 6 +++++ .../tests/openlineage/plugins/test_adapter.py | 26 +++++++++++++------ .../openlineage/plugins/test_listener.py | 12 +++++---- 4 files changed, 39 insertions(+), 19 deletions(-) diff --git a/providers/src/airflow/providers/openlineage/plugins/adapter.py b/providers/src/airflow/providers/openlineage/plugins/adapter.py index ec836b541a2a3..e64109911926c 100644 --- a/providers/src/airflow/providers/openlineage/plugins/adapter.py +++ b/providers/src/airflow/providers/openlineage/plugins/adapter.py @@ -115,11 +115,11 @@ def _read_yaml_config(path: str) -> dict | None: return yaml.safe_load(config_file) @staticmethod - def build_dag_run_id(dag_id: str, logical_date: datetime) -> str: + def build_dag_run_id(dag_id: str, logical_date: datetime, clear_number: int) -> str: return str( generate_static_uuid( instant=logical_date, - data=f"{conf.namespace()}.{dag_id}".encode(), + data=f"{conf.namespace()}.{dag_id}.{clear_number}".encode(), ) ) @@ -333,6 +333,7 @@ def dag_started( nominal_end_time: str, owners: list[str], run_facets: dict[str, RunFacet], + clear_number: int, description: str | None = None, job_facets: dict[str, JobFacet] | None = None, # Custom job facets ): @@ -349,8 +350,7 @@ def dag_started( ), run=self._build_run( run_id=self.build_dag_run_id( - dag_id=dag_id, - logical_date=logical_date, + dag_id=dag_id, logical_date=logical_date, clear_number=clear_number ), job_name=dag_id, nominal_start_time=nominal_start_time, @@ -374,6 +374,7 @@ def dag_success( run_id: str, end_date: datetime, logical_date: datetime, + clear_number: int, dag_run_state: DagRunState, task_ids: list[str], ): @@ -384,8 +385,7 @@ def dag_success( job=self._build_job(job_name=dag_id, job_type=_JOB_TYPE_DAG), run=Run( runId=self.build_dag_run_id( - dag_id=dag_id, - logical_date=logical_date, + dag_id=dag_id, logical_date=logical_date, clear_number=clear_number ), facets={ **get_airflow_state_run_facet(dag_id, run_id, task_ids, dag_run_state), @@ -409,6 +409,7 @@ def dag_failed( run_id: str, end_date: datetime, logical_date: datetime, + clear_number: int, dag_run_state: DagRunState, task_ids: list[str], msg: str, @@ -422,6 +423,7 @@ def dag_failed( runId=self.build_dag_run_id( dag_id=dag_id, logical_date=logical_date, + clear_number=clear_number, ), facets={ "errorMessage": error_message_run.ErrorMessageRunFacet( diff --git a/providers/src/airflow/providers/openlineage/plugins/listener.py b/providers/src/airflow/providers/openlineage/plugins/listener.py index 6a539ea27f4ce..7f0073d8f951b 100644 --- a/providers/src/airflow/providers/openlineage/plugins/listener.py +++ b/providers/src/airflow/providers/openlineage/plugins/listener.py @@ -143,6 +143,7 @@ def on_running(): parent_run_id = self.adapter.build_dag_run_id( dag_id=dag.dag_id, logical_date=dagrun.logical_date, + clear_number=dagrun.clear_number, ) if hasattr(task_instance, "logical_date"): @@ -228,6 +229,7 @@ def on_success(): parent_run_id = self.adapter.build_dag_run_id( dag_id=dag.dag_id, logical_date=dagrun.logical_date, + clear_number=dagrun.clear_number, ) if hasattr(task_instance, "logical_date"): @@ -332,6 +334,7 @@ def on_failure(): parent_run_id = self.adapter.build_dag_run_id( dag_id=dag.dag_id, logical_date=dagrun.logical_date, + clear_number=dagrun.clear_number, ) if hasattr(task_instance, "logical_date"): @@ -467,6 +470,7 @@ def on_dag_run_running(self, dag_run: DagRun, msg: str) -> None: nominal_start_time=data_interval_start, nominal_end_time=data_interval_end, run_facets=run_facets, + clear_number=dag_run.clear_number, owners=[x.strip() for x in dag_run.dag.owner.split(",")] if dag_run.dag else None, description=dag_run.dag.description if dag_run.dag else None, # AirflowJobFacet should be created outside ProcessPoolExecutor that pickles objects, @@ -502,6 +506,7 @@ def on_dag_run_success(self, dag_run: DagRun, msg: str) -> None: run_id=dag_run.run_id, end_date=dag_run.end_date, logical_date=dag_run.logical_date, + clear_number=dag_run.clear_number, task_ids=task_ids, dag_run_state=dag_run.get_state(), ) @@ -534,6 +539,7 @@ def on_dag_run_failed(self, dag_run: DagRun, msg: str) -> None: run_id=dag_run.run_id, end_date=dag_run.end_date, logical_date=dag_run.logical_date, + clear_number=dag_run.clear_number, dag_run_state=dag_run.get_state(), task_ids=task_ids, msg=msg, diff --git a/providers/tests/openlineage/plugins/test_adapter.py b/providers/tests/openlineage/plugins/test_adapter.py index a9787a9399a3d..a2266bb16bc0b 100644 --- a/providers/tests/openlineage/plugins/test_adapter.py +++ b/providers/tests/openlineage/plugins/test_adapter.py @@ -594,6 +594,7 @@ def test_emit_dag_started_event(mock_stats_incr, mock_stats_timer, generate_stat dag_id=dag_id, start_date=event_time, logical_date=event_time, + clear_number=0, nominal_start_time=event_time.isoformat(), nominal_end_time=event_time.isoformat(), owners=["airflow"], @@ -708,6 +709,7 @@ def test_emit_dag_complete_event( run_id=run_id, end_date=event_time, logical_date=event_time, + clear_number=0, dag_run_state=DagRunState.SUCCESS, task_ids=["task_0", "task_1", "task_2.test"], ) @@ -797,6 +799,7 @@ def test_emit_dag_failed_event( run_id=run_id, end_date=event_time, logical_date=event_time, + clear_number=0, dag_run_state=DagRunState.FAILED, task_ids=["task_0", "task_1", "task_2.test"], msg="error msg", @@ -864,6 +867,7 @@ def test_build_dag_run_id_is_valid_uuid(): result = OpenLineageAdapter.build_dag_run_id( dag_id=dag_id, logical_date=logical_date, + clear_number=0, ) uuid_result = uuid.UUID(result) assert uuid_result @@ -872,24 +876,30 @@ def test_build_dag_run_id_is_valid_uuid(): def test_build_dag_run_id_same_input_give_same_result(): result1 = OpenLineageAdapter.build_dag_run_id( - dag_id="dag1", - logical_date=datetime.datetime(2024, 1, 1, 1, 1, 1), + dag_id="dag1", logical_date=datetime.datetime(2024, 1, 1, 1, 1, 1), clear_number=0 ) result2 = OpenLineageAdapter.build_dag_run_id( - dag_id="dag1", - logical_date=datetime.datetime(2024, 1, 1, 1, 1, 1), + dag_id="dag1", logical_date=datetime.datetime(2024, 1, 1, 1, 1, 1), clear_number=0 ) assert result1 == result2 def test_build_dag_run_id_different_inputs_give_different_results(): result1 = OpenLineageAdapter.build_dag_run_id( - dag_id="dag1", - logical_date=datetime.datetime.now(), + dag_id="dag1", logical_date=datetime.datetime.now(), clear_number=0 ) result2 = OpenLineageAdapter.build_dag_run_id( - dag_id="dag2", - logical_date=datetime.datetime.now(), + dag_id="dag2", logical_date=datetime.datetime.now(), clear_number=0 + ) + assert result1 != result2 + + +def test_build_dag_run_id_different_clear_number_give_different_results(): + result1 = OpenLineageAdapter.build_dag_run_id( + dag_id="dag1", logical_date=datetime.datetime(2024, 1, 1, 1, 1, 1), clear_number=0 + ) + result2 = OpenLineageAdapter.build_dag_run_id( + dag_id="dag1", logical_date=datetime.datetime(2024, 1, 1, 1, 1, 1), clear_number=1 ) assert result1 != result2 diff --git a/providers/tests/openlineage/plugins/test_listener.py b/providers/tests/openlineage/plugins/test_listener.py index cbc4436ef0133..ccb330faa5d9e 100644 --- a/providers/tests/openlineage/plugins/test_listener.py +++ b/providers/tests/openlineage/plugins/test_listener.py @@ -188,8 +188,8 @@ def _create_listener_and_task_instance() -> tuple[OpenLineageListener, TaskInsta # Now you can use listener and task_instance in your tests to simulate their interaction. """ - def mock_dag_id(dag_id, logical_date): - return f"{logical_date.isoformat()}.{dag_id}" + def mock_dag_id(dag_id, logical_date, clear_number): + return f"{logical_date.isoformat()}.{dag_id}.{clear_number}" def mock_task_id(dag_id, task_id, try_number, logical_date, map_index): return f"{logical_date.isoformat()}.{dag_id}.{task_id}.{try_number}.{map_index}" @@ -214,6 +214,7 @@ def mock_task_id(dag_id, task_id, try_number, logical_date, map_index): task_instance.dag_run.run_id = "dag_run_run_id" task_instance.dag_run.data_interval_start = None task_instance.dag_run.data_interval_end = None + task_instance.dag_run.clear_number = 0 if AIRFLOW_V_3_0_PLUS: task_instance.dag_run.logical_date = dt.datetime(2020, 1, 1, 1, 1, 1) else: @@ -276,7 +277,7 @@ def test_adapter_start_task_is_called_with_proper_arguments( job_description="Test DAG Description", event_time="2023-01-01T13:01:01", parent_job_name="dag_id", - parent_run_id="2020-01-01T01:01:01.dag_id", + parent_run_id="2020-01-01T01:01:01.dag_id.0", code_location=None, nominal_start_time=None, nominal_end_time=None, @@ -330,7 +331,7 @@ def test_adapter_fail_task_is_called_with_proper_arguments( end_time="2023-01-03T13:01:01", job_name="job_name", parent_job_name="dag_id", - parent_run_id="2020-01-01T01:01:01.dag_id", + parent_run_id="2020-01-01T01:01:01.dag_id.0", run_id="2020-01-01T01:01:01.dag_id.task_id.1.-1", task=listener.extractor_manager.extract_metadata(), run_facets={ @@ -379,7 +380,7 @@ def test_adapter_complete_task_is_called_with_proper_arguments( end_time="2023-01-03T13:01:01", job_name="job_name", parent_job_name="dag_id", - parent_run_id="2020-01-01T01:01:01.dag_id", + parent_run_id="2020-01-01T01:01:01.dag_id.0", run_id=f"2020-01-01T01:01:01.dag_id.task_id.{EXPECTED_TRY_NUMBER_1}.-1", task=listener.extractor_manager.extract_metadata(), run_facets={ @@ -653,6 +654,7 @@ def set_result(*args, **kwargs): run_id="", end_date=event_time, logical_date=callback_future, + clear_number=0, dag_run_state=DagRunState.FAILED, task_ids=["task_id"], msg="", From c09e9b5202ce4f1c6ba02381c0284cb4cd7a84e7 Mon Sep 17 00:00:00 2001 From: Jarek Potiuk Date: Sat, 7 Dec 2024 13:41:44 +0100 Subject: [PATCH 17/25] Fix flaky supervisor conflict test (#44760) The test sometimes runs for a longer time and generates more requests - thus producing slightly different output and count of requests. This PR accepts bigger request count. --- task_sdk/tests/execution_time/test_supervisor.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/task_sdk/tests/execution_time/test_supervisor.py b/task_sdk/tests/execution_time/test_supervisor.py index 6bd3047de67e2..96506a48bda77 100644 --- a/task_sdk/tests/execution_time/test_supervisor.py +++ b/task_sdk/tests/execution_time/test_supervisor.py @@ -389,22 +389,23 @@ def handle_request(request: httpx.Request) -> httpx.Response: # Wait for the subprocess to finish -- it should have been terminated assert proc.wait() == -signal.SIGTERM + count_request = request_count["count"] # Verify the number of requests made - assert request_count["count"] == 2 + assert count_request >= 2 assert captured_logs == [ { - "detail": { - "current_state": "success", - "message": "TI is no longer in the running state and task should terminate", - "reason": "not_running", - }, "event": "Server indicated the task shouldn't be running anymore", "level": "error", "status_code": 409, "logger": "supervisor", "timestamp": mocker.ANY, + "detail": { + "reason": "not_running", + "message": "TI is no longer in the running state and task should terminate", + "current_state": "success", + }, } - ] + ] * (count_request - 1) @pytest.mark.parametrize("captured_logs", [logging.WARNING], indirect=True) def test_heartbeat_failures_handling(self, monkeypatch, mocker, captured_logs, time_machine): From 558148c6ded5ee6e05dedced154130b7b8cf8140 Mon Sep 17 00:00:00 2001 From: Jens Scheffler <95105677+jscheffl@users.noreply.github.com> Date: Sat, 7 Dec 2024 15:47:30 +0100 Subject: [PATCH 18/25] Remove Provider Deprecations in Microsoft-PSRP (#44761) --- .../providers/microsoft/psrp/CHANGELOG.rst | 9 +++++++++ .../providers/microsoft/psrp/hooks/psrp.py | 16 +--------------- .../tests/microsoft/psrp/hooks/test_psrp.py | 12 +----------- 3 files changed, 11 insertions(+), 26 deletions(-) diff --git a/providers/src/airflow/providers/microsoft/psrp/CHANGELOG.rst b/providers/src/airflow/providers/microsoft/psrp/CHANGELOG.rst index 794ba1d6cb6a3..0fcec517f0b50 100644 --- a/providers/src/airflow/providers/microsoft/psrp/CHANGELOG.rst +++ b/providers/src/airflow/providers/microsoft/psrp/CHANGELOG.rst @@ -27,6 +27,15 @@ Changelog --------- +main +.... + +.. warning:: + All deprecated classes, parameters and features have been removed from the Microsoft.PSRP provider package. + The following breaking changes were introduced: + + * Passing kwargs to ``invoke_cmdlet`` was removed. Please use ``parameters`` instead. + 2.8.0 ..... diff --git a/providers/src/airflow/providers/microsoft/psrp/hooks/psrp.py b/providers/src/airflow/providers/microsoft/psrp/hooks/psrp.py index 9c855703f9dc0..cc5e2da178630 100644 --- a/providers/src/airflow/providers/microsoft/psrp/hooks/psrp.py +++ b/providers/src/airflow/providers/microsoft/psrp/hooks/psrp.py @@ -22,7 +22,6 @@ from copy import copy from logging import DEBUG, ERROR, INFO, WARNING from typing import TYPE_CHECKING, Any, Callable -from warnings import warn from weakref import WeakKeyDictionary from pypsrp.host import PSHost @@ -30,7 +29,7 @@ from pypsrp.powershell import PowerShell, PSInvocationState, RunspacePool from pypsrp.wsman import WSMan -from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning +from airflow.exceptions import AirflowException from airflow.hooks.base import BaseHook INFORMATIONAL_RECORD_LEVEL_MAP = { @@ -229,21 +228,8 @@ def invoke_cmdlet( use_local_scope: bool | None = None, arguments: list[str] | None = None, parameters: dict[str, str] | None = None, - **kwargs: str, ) -> PowerShell: """Invoke a PowerShell cmdlet and return session.""" - if kwargs: - if parameters: - raise ValueError("**kwargs not allowed when 'parameters' is used at the same time.") - warn( - "Passing **kwargs to 'invoke_cmdlet' is deprecated " - "and will be removed in a future release. Please use 'parameters' " - "instead.", - AirflowProviderDeprecationWarning, - stacklevel=2, - ) - parameters = kwargs - with self.invoke() as ps: ps.add_cmdlet(name, use_local_scope=use_local_scope) for argument in arguments or (): diff --git a/providers/tests/microsoft/psrp/hooks/test_psrp.py b/providers/tests/microsoft/psrp/hooks/test_psrp.py index 1f56e67a7a689..71251a216c956 100644 --- a/providers/tests/microsoft/psrp/hooks/test_psrp.py +++ b/providers/tests/microsoft/psrp/hooks/test_psrp.py @@ -25,7 +25,7 @@ from pypsrp.messages import MessageType from pypsrp.powershell import PSInvocationState -from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning +from airflow.exceptions import AirflowException from airflow.models import Connection from airflow.providers.microsoft.psrp.hooks.psrp import PsrpHook @@ -195,16 +195,6 @@ def test_invoke_cmdlet(self, *mocks): assert [call({"bar": "1", "baz": "2"})] == ps.add_parameters.mock_calls assert [call(arg) for arg in arguments] == ps.add_argument.mock_calls - def test_invoke_cmdlet_deprecated_kwargs(self, *mocks): - with PsrpHook(CONNECTION_ID) as hook: - with pytest.warns( - AirflowProviderDeprecationWarning, - match=r"Passing \*\*kwargs to 'invoke_cmdlet' is deprecated and will be removed in a future release. Please use 'parameters' instead.", - ): - ps = hook.invoke_cmdlet("foo", bar="1", baz="2") - assert [call("foo", use_local_scope=None)] == ps.add_cmdlet.mock_calls - assert [call({"bar": "1", "baz": "2"})] == ps.add_parameters.mock_calls - def test_invoke_powershell(self, *mocks): with PsrpHook(CONNECTION_ID) as hook: ps = hook.invoke_powershell("foo") From e786c78f528ab3f9f8073ad1b4dc7323b8e5a0ce Mon Sep 17 00:00:00 2001 From: Kalyan R Date: Sat, 7 Dec 2024 20:21:47 +0530 Subject: [PATCH 19/25] Remove Provider Deprecations in Yandex provider (#44754) --- .../airflow/providers/yandex/CHANGELOG.rst | 13 ++++++ .../airflow/providers/yandex/hooks/yandex.py | 24 +---------- .../yandex/hooks/yandexcloud_dataproc.py | 30 ------------- .../providers/yandex/operators/dataproc.py | 12 ------ .../yandex/operators/yandexcloud_dataproc.py | 30 ------------- providers/tests/yandex/hooks/test_yandex.py | 42 ------------------- 6 files changed, 15 insertions(+), 136 deletions(-) delete mode 100644 providers/src/airflow/providers/yandex/hooks/yandexcloud_dataproc.py delete mode 100644 providers/src/airflow/providers/yandex/operators/yandexcloud_dataproc.py diff --git a/providers/src/airflow/providers/yandex/CHANGELOG.rst b/providers/src/airflow/providers/yandex/CHANGELOG.rst index a34d53265e92f..2f3b6822448e9 100644 --- a/providers/src/airflow/providers/yandex/CHANGELOG.rst +++ b/providers/src/airflow/providers/yandex/CHANGELOG.rst @@ -27,6 +27,19 @@ Changelog --------- +main +.... + +.. warning:: + All deprecated classes, parameters and features have been removed from the {provider_name} provider package. + The following breaking changes were introduced: + + * removed ``YandexCloudBaseHook.provider_user_agent`` . Use ``utils.user_agent.provider_user_agent`` instead. + * removed ``connection_id`` parameter from ``YandexCloudBaseHook``. Use ``yandex_conn_id`` parameter. + * removed ``yandex.hooks.yandexcloud_dataproc`` module. + * removed ``yandex.operators.yandexcloud_dataproc`` module. + * removed implicit passing of ``yandex_conn_id`` in ``DataprocBaseOperator``. Please pass it as a parameter. + 3.12.0 ...... diff --git a/providers/src/airflow/providers/yandex/hooks/yandex.py b/providers/src/airflow/providers/yandex/hooks/yandex.py index f132b708229c2..a8e796c08c143 100644 --- a/providers/src/airflow/providers/yandex/hooks/yandex.py +++ b/providers/src/airflow/providers/yandex/hooks/yandex.py @@ -16,12 +16,10 @@ # under the License. from __future__ import annotations -import warnings from typing import Any import yandexcloud -from airflow.exceptions import AirflowProviderDeprecationWarning from airflow.hooks.base import BaseHook from airflow.providers.yandex.utils.credentials import ( CredentialsType, @@ -38,7 +36,6 @@ class YandexCloudBaseHook(BaseHook): A base hook for Yandex.Cloud related tasks. :param yandex_conn_id: The connection ID to use when fetching connection info - :param connection_id: Deprecated, use yandex_conn_id instead :param default_folder_id: The folder ID to use instead of connection folder ID :param default_public_ssh_key: The key to use instead of connection key :param default_service_account_id: The service account ID to use instead of key service account ID @@ -96,16 +93,6 @@ def get_connection_form_widgets(cls) -> dict[str, Any]: ), } - @classmethod - def provider_user_agent(cls) -> str | None: - warnings.warn( - "Using `provider_user_agent` in `YandexCloudBaseHook` is deprecated. " - "Please use it in `utils.user_agent` instead.", - AirflowProviderDeprecationWarning, - stacklevel=2, - ) - return provider_user_agent() - @classmethod def get_ui_field_behaviour(cls) -> dict[str, Any]: """Return custom UI field behaviour for Yandex connection.""" @@ -116,21 +103,14 @@ def get_ui_field_behaviour(cls) -> dict[str, Any]: def __init__( self, - # connection_id is deprecated, use yandex_conn_id instead - connection_id: str | None = None, yandex_conn_id: str | None = None, default_folder_id: str | None = None, default_public_ssh_key: str | None = None, default_service_account_id: str | None = None, ) -> None: super().__init__() - if connection_id: - warnings.warn( - "Using `connection_id` is deprecated. Please use `yandex_conn_id` parameter.", - AirflowProviderDeprecationWarning, - stacklevel=2, - ) - self.connection_id = yandex_conn_id or connection_id or default_conn_name + + self.connection_id = yandex_conn_id or default_conn_name self.connection = self.get_connection(self.connection_id) self.extras = self.connection.extra_dejson self.credentials: CredentialsType = get_credentials( diff --git a/providers/src/airflow/providers/yandex/hooks/yandexcloud_dataproc.py b/providers/src/airflow/providers/yandex/hooks/yandexcloud_dataproc.py deleted file mode 100644 index 6256769c9230f..0000000000000 --- a/providers/src/airflow/providers/yandex/hooks/yandexcloud_dataproc.py +++ /dev/null @@ -1,30 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -"""This module is deprecated. Please use :mod:`airflow.providers.yandex.hooks.dataproc` instead.""" - -from __future__ import annotations - -import warnings - -from airflow.exceptions import AirflowProviderDeprecationWarning -from airflow.providers.yandex.hooks.dataproc import * # noqa: F403 - -warnings.warn( - "This module is deprecated. Please use `airflow.providers.yandex.hooks.dataproc` instead.", - AirflowProviderDeprecationWarning, - stacklevel=2, -) diff --git a/providers/src/airflow/providers/yandex/operators/dataproc.py b/providers/src/airflow/providers/yandex/operators/dataproc.py index eb5ce140f55e3..20f992496da0a 100644 --- a/providers/src/airflow/providers/yandex/operators/dataproc.py +++ b/providers/src/airflow/providers/yandex/operators/dataproc.py @@ -16,12 +16,10 @@ # under the License. from __future__ import annotations -import warnings from collections.abc import Iterable, Sequence from dataclasses import dataclass from typing import TYPE_CHECKING -from airflow.exceptions import AirflowProviderDeprecationWarning from airflow.models import BaseOperator from airflow.providers.yandex.hooks.dataproc import DataprocHook @@ -269,16 +267,6 @@ def __init__(self, *, yandex_conn_id: str | None = None, cluster_id: str | None def _setup(self, context: Context) -> DataprocHook: if self.cluster_id is None: self.cluster_id = context["task_instance"].xcom_pull(key="cluster_id") - if self.yandex_conn_id is None: - xcom_yandex_conn_id = context["task_instance"].xcom_pull(key="yandexcloud_connection_id") - if xcom_yandex_conn_id: - warnings.warn( - "Implicit pass of `yandex_conn_id` is deprecated, please pass it explicitly", - AirflowProviderDeprecationWarning, - stacklevel=2, - ) - self.yandex_conn_id = xcom_yandex_conn_id - return DataprocHook(yandex_conn_id=self.yandex_conn_id) def execute(self, context: Context): diff --git a/providers/src/airflow/providers/yandex/operators/yandexcloud_dataproc.py b/providers/src/airflow/providers/yandex/operators/yandexcloud_dataproc.py deleted file mode 100644 index 1f7db5d512c77..0000000000000 --- a/providers/src/airflow/providers/yandex/operators/yandexcloud_dataproc.py +++ /dev/null @@ -1,30 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -"""This module is deprecated. Please use :mod:`airflow.providers.yandex.operators.dataproc` instead.""" - -from __future__ import annotations - -import warnings - -from airflow.exceptions import AirflowProviderDeprecationWarning -from airflow.providers.yandex.operators.dataproc import * # noqa: F403 - -warnings.warn( - "This module is deprecated. Please use `airflow.providers.yandex.operators.dataproc` instead.", - AirflowProviderDeprecationWarning, - stacklevel=2, -) diff --git a/providers/tests/yandex/hooks/test_yandex.py b/providers/tests/yandex/hooks/test_yandex.py index 7066c3e7553b1..92188cd07f319 100644 --- a/providers/tests/yandex/hooks/test_yandex.py +++ b/providers/tests/yandex/hooks/test_yandex.py @@ -17,15 +17,12 @@ # under the License. from __future__ import annotations -import os from unittest import mock -from unittest.mock import MagicMock import pytest yandexcloud = pytest.importorskip("yandexcloud") -from airflow.exceptions import AirflowProviderDeprecationWarning from airflow.providers.yandex.hooks.yandex import YandexCloudBaseHook from tests_common.test_utils.config import conf_vars @@ -55,23 +52,6 @@ def test_client_created_without_exceptions(self, mock_get_credentials, mock_get_ ) assert hook.client is not None - @mock.patch("airflow.hooks.base.BaseHook.get_connection") - @mock.patch("airflow.providers.yandex.utils.credentials.get_credentials") - def test_provider_user_agent(self, mock_get_credentials, mock_get_connection): - mock_get_connection.return_value = mock.Mock(yandex_conn_id="yandexcloud_default", extra_dejson="{}") - mock_get_credentials.return_value = {"token": 122323} - sdk_prefix = "MyAirflow" - - hook = YandexCloudBaseHook() - with ( - conf_vars({("yandex", "sdk_user_agent_prefix"): sdk_prefix}), - pytest.warns( - AirflowProviderDeprecationWarning, - match="Using `provider_user_agent` in `YandexCloudBaseHook` is deprecated. Please use it in `utils.user_agent` instead.", - ), - ): - assert hook.provider_user_agent().startswith(sdk_prefix) - @mock.patch("airflow.hooks.base.BaseHook.get_connection") @mock.patch("airflow.providers.yandex.utils.credentials.get_credentials") def test_sdk_user_agent(self, mock_get_credentials, mock_get_connection): @@ -83,28 +63,6 @@ def test_sdk_user_agent(self, mock_get_credentials, mock_get_connection): hook = YandexCloudBaseHook() assert hook.sdk._channels._client_user_agent.startswith(sdk_prefix) - @pytest.mark.parametrize( - "uri", - [ - pytest.param( - "a://?extra__yandexcloud__folder_id=abc&extra__yandexcloud__public_ssh_key=abc", id="prefix" - ), - pytest.param("a://?folder_id=abc&public_ssh_key=abc", id="no-prefix"), - ], - ) - @mock.patch("airflow.providers.yandex.utils.credentials.get_credentials", new=MagicMock()) - def test_backcompat_prefix_works(self, uri): - with ( - mock.patch.dict(os.environ, {"AIRFLOW_CONN_MY_CONN": uri}), - pytest.warns( - AirflowProviderDeprecationWarning, - match="Using `connection_id` is deprecated. Please use `yandex_conn_id` parameter.", - ), - ): - hook = YandexCloudBaseHook("my_conn") - assert hook.default_folder_id == "abc" - assert hook.default_public_ssh_key == "abc" - @mock.patch("airflow.hooks.base.BaseHook.get_connection") @mock.patch("airflow.providers.yandex.utils.credentials.get_credentials") def test_get_endpoint_specified(self, mock_get_credentials, mock_get_connection): From 75b24e88b25a762b503e783154361ff978024340 Mon Sep 17 00:00:00 2001 From: Jens Scheffler <95105677+jscheffl@users.noreply.github.com> Date: Sat, 7 Dec 2024 16:01:40 +0100 Subject: [PATCH 20/25] Remove Provider Deprecations in Microsoft-MSSQL (#44762) --- .../operators.rst | 4 +- .../providers/microsoft/mssql/CHANGELOG.rst | 13 ++++ .../microsoft/mssql/operators/__init__.py | 17 ----- .../microsoft/mssql/operators/mssql.py | 73 ------------------- .../providers/microsoft/mssql/provider.yaml | 5 -- .../microsoft/mssql/operators/__init__.py | 16 ---- .../microsoft/mssql/operators/test_mssql.py | 65 ----------------- 7 files changed, 15 insertions(+), 178 deletions(-) delete mode 100644 providers/src/airflow/providers/microsoft/mssql/operators/__init__.py delete mode 100644 providers/src/airflow/providers/microsoft/mssql/operators/mssql.py delete mode 100644 providers/tests/microsoft/mssql/operators/__init__.py delete mode 100644 providers/tests/microsoft/mssql/operators/test_mssql.py diff --git a/docs/apache-airflow-providers-microsoft-mssql/operators.rst b/docs/apache-airflow-providers-microsoft-mssql/operators.rst index 560e196731b57..75955a2dc9a83 100644 --- a/docs/apache-airflow-providers-microsoft-mssql/operators.rst +++ b/docs/apache-airflow-providers-microsoft-mssql/operators.rst @@ -25,8 +25,8 @@ The purpose of this guide is to define tasks involving interactions with the MSS Use the :class:`SQLExecuteQueryOperator ` to execute SQL commands in MSSQL database. -.. warning:: - Previously, MsSqlOperator was used to perform this kind of operation. But at the moment MsSqlOperator is deprecated and will be removed in future versions of the provider. Please consider to switch to SQLExecuteQueryOperator as soon as possible. +.. note:: + Previously, ``MsSqlOperator`` was used to perform this kind of operation. Please use ``SQLExecuteQueryOperator`` instead. Common Database Operations with SQLExecuteQueryOperator ------------------------------------------------------- diff --git a/providers/src/airflow/providers/microsoft/mssql/CHANGELOG.rst b/providers/src/airflow/providers/microsoft/mssql/CHANGELOG.rst index 92c92943ac251..78fa7fd456fea 100644 --- a/providers/src/airflow/providers/microsoft/mssql/CHANGELOG.rst +++ b/providers/src/airflow/providers/microsoft/mssql/CHANGELOG.rst @@ -27,6 +27,19 @@ Changelog --------- +main +.... + +Breaking changes +~~~~~~~~~~~~~~~~ + +.. warning:: + All deprecated classes, parameters and features have been removed from the MySQL provider package. + The following breaking changes were introduced: + + * Operators + * Remove ``airflow.providers.microsoft.mssql.operators.mssql.MsSqlOperator``. Please use ``airflow.providers.common.sql.operators.sql.SQLExecuteQueryOperator``. + 3.9.2 ..... diff --git a/providers/src/airflow/providers/microsoft/mssql/operators/__init__.py b/providers/src/airflow/providers/microsoft/mssql/operators/__init__.py deleted file mode 100644 index 217e5db960782..0000000000000 --- a/providers/src/airflow/providers/microsoft/mssql/operators/__init__.py +++ /dev/null @@ -1,17 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. diff --git a/providers/src/airflow/providers/microsoft/mssql/operators/mssql.py b/providers/src/airflow/providers/microsoft/mssql/operators/mssql.py deleted file mode 100644 index 3e698e6589b03..0000000000000 --- a/providers/src/airflow/providers/microsoft/mssql/operators/mssql.py +++ /dev/null @@ -1,73 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -from __future__ import annotations - -from collections.abc import Sequence -from typing import ClassVar - -from deprecated import deprecated - -from airflow.exceptions import AirflowProviderDeprecationWarning -from airflow.providers.common.sql.operators.sql import SQLExecuteQueryOperator - - -@deprecated( - reason=( - "Please use `airflow.providers.common.sql.operators.sql.SQLExecuteQueryOperator`." - "Also, you can provide `hook_params={'schema': }`." - ), - category=AirflowProviderDeprecationWarning, -) -class MsSqlOperator(SQLExecuteQueryOperator): - """ - Executes sql code in a specific Microsoft SQL database. - - .. seealso:: - For more information on how to use this operator, take a look at the guide: - :ref:`howto/operator:MsSqlOperator` - - This operator may use one of two hooks, depending on the ``conn_type`` of the connection. - - If conn_type is ``'odbc'``, then :py:class:`~airflow.providers.odbc.hooks.odbc.OdbcHook` - is used. Otherwise, :py:class:`~airflow.providers.microsoft.mssql.hooks.mssql.MsSqlHook` is used. - - This class is deprecated. - - Please use :class:`airflow.providers.common.sql.operators.sql.SQLExecuteQueryOperator`. - - :param sql: the sql code to be executed (templated) - :param mssql_conn_id: reference to a specific mssql database - :param parameters: (optional) the parameters to render the SQL query with. - :param autocommit: if True, each command is automatically committed. - (default value: False) - :param database: name of database which overwrite defined one in connection - """ - - template_fields: Sequence[str] = ("sql",) - template_ext: Sequence[str] = (".sql",) - template_fields_renderers: ClassVar[dict] = {"sql": "tsql"} - ui_color = "#ededed" - - def __init__( - self, *, mssql_conn_id: str = "mssql_default", database: str | None = None, **kwargs - ) -> None: - if database is not None: - hook_params = kwargs.pop("hook_params", {}) - kwargs["hook_params"] = {"schema": database, **hook_params} - - super().__init__(conn_id=mssql_conn_id, **kwargs) diff --git a/providers/src/airflow/providers/microsoft/mssql/provider.yaml b/providers/src/airflow/providers/microsoft/mssql/provider.yaml index f186423668de0..de96f1500f745 100644 --- a/providers/src/airflow/providers/microsoft/mssql/provider.yaml +++ b/providers/src/airflow/providers/microsoft/mssql/provider.yaml @@ -71,11 +71,6 @@ integrations: - /docs/apache-airflow-providers-microsoft-mssql/operators.rst tags: [software] -operators: - - integration-name: Microsoft SQL Server (MSSQL) - python-modules: - - airflow.providers.microsoft.mssql.operators.mssql - hooks: - integration-name: Microsoft SQL Server (MSSQL) python-modules: diff --git a/providers/tests/microsoft/mssql/operators/__init__.py b/providers/tests/microsoft/mssql/operators/__init__.py deleted file mode 100644 index 13a83393a9124..0000000000000 --- a/providers/tests/microsoft/mssql/operators/__init__.py +++ /dev/null @@ -1,16 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. diff --git a/providers/tests/microsoft/mssql/operators/test_mssql.py b/providers/tests/microsoft/mssql/operators/test_mssql.py deleted file mode 100644 index 8f2311ec769cc..0000000000000 --- a/providers/tests/microsoft/mssql/operators/test_mssql.py +++ /dev/null @@ -1,65 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -from __future__ import annotations - -from unittest import mock -from unittest.mock import MagicMock, Mock - -import pytest - -from airflow.exceptions import AirflowException -from airflow.providers.common.sql.operators.sql import SQLExecuteQueryOperator - -try: - from airflow.providers.microsoft.mssql.hooks.mssql import MsSqlHook -except ImportError: - pytest.skip("MSSQL not available", allow_module_level=True) - -MSSQL_DEFAULT = "mssql_default" - - -class TestMsSqlOperator: - @mock.patch("airflow.providers.common.sql.operators.sql.SQLExecuteQueryOperator.get_db_hook") - def test_get_hook_from_conn(self, mock_get_db_hook): - """ - :class:`~.MsSqlOperator` should use the hook returned by :meth:`airflow.models.Connection.get_hook` - if one is returned. - - This behavior is necessary in order to support usage of :class:`~.OdbcHook` with this operator. - - Specifically we verify here that :meth:`~.MsSqlOperator.get_hook` returns the hook returned from a - call of ``get_hook`` on the object returned from :meth:`~.BaseHook.get_connection`. - """ - mock_hook = MagicMock() - mock_get_db_hook.return_value = mock_hook - - op = SQLExecuteQueryOperator(task_id="test", sql="", conn_id=MSSQL_DEFAULT) - assert op.get_db_hook() == mock_hook - - @mock.patch( - "airflow.providers.common.sql.operators.sql.SQLExecuteQueryOperator.get_db_hook", autospec=MsSqlHook - ) - def test_get_hook_default(self, mock_get_db_hook): - """ - If :meth:`airflow.models.Connection.get_hook` does not return a hook (e.g. because of an invalid - conn type), then :class:`~.MsSqlHook` should be used. - """ - mock_get_db_hook.return_value.side_effect = Mock(side_effect=AirflowException()) - - op = SQLExecuteQueryOperator(task_id="test", sql="", conn_id=MSSQL_DEFAULT) - assert op.get_db_hook().__class__.__name__ == "MsSqlHook" From 69251624a6bcf3b7c16c254435009e28a35c2b34 Mon Sep 17 00:00:00 2001 From: Kunal Bhattacharya Date: Sat, 7 Dec 2024 21:41:13 +0530 Subject: [PATCH 21/25] Deprecated DruidCheckOperator (#44765) --- .../providers/apache/druid/CHANGELOG.rst | 11 ++++++ .../apache/druid/operators/druid_check.py | 38 ------------------- .../providers/apache/druid/provider.yaml | 1 - 3 files changed, 11 insertions(+), 39 deletions(-) delete mode 100644 providers/src/airflow/providers/apache/druid/operators/druid_check.py diff --git a/providers/src/airflow/providers/apache/druid/CHANGELOG.rst b/providers/src/airflow/providers/apache/druid/CHANGELOG.rst index 0a8cc2a947701..6e0d4aeed6efc 100644 --- a/providers/src/airflow/providers/apache/druid/CHANGELOG.rst +++ b/providers/src/airflow/providers/apache/druid/CHANGELOG.rst @@ -27,6 +27,17 @@ Changelog --------- +main +..... + +.. warning:: + All deprecated classes, parameters and features have been removed from the Apache Druid provider package. + The following breaking changes were introduced: + + * Operators + + * Removed ``DruidCheckOperator``. Please use ``airflow.providers.common.sql.operators.sql.SQLCheckOperator`` instead. + 3.12.1 ...... diff --git a/providers/src/airflow/providers/apache/druid/operators/druid_check.py b/providers/src/airflow/providers/apache/druid/operators/druid_check.py deleted file mode 100644 index 5fe6844a66d19..0000000000000 --- a/providers/src/airflow/providers/apache/druid/operators/druid_check.py +++ /dev/null @@ -1,38 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -from __future__ import annotations - -from deprecated import deprecated - -from airflow.exceptions import AirflowProviderDeprecationWarning -from airflow.providers.common.sql.operators.sql import SQLCheckOperator - - -@deprecated( - reason="Please use `airflow.providers.common.sql.operators.sql.SQLCheckOperator`.", - category=AirflowProviderDeprecationWarning, -) -class DruidCheckOperator(SQLCheckOperator): - """ - This class is deprecated. - - Please use :class:`airflow.providers.common.sql.operators.sql.SQLCheckOperator`. - """ - - def __init__(self, druid_broker_conn_id: str = "druid_broker_default", **kwargs): - super().__init__(conn_id=druid_broker_conn_id, **kwargs) diff --git a/providers/src/airflow/providers/apache/druid/provider.yaml b/providers/src/airflow/providers/apache/druid/provider.yaml index 3ceb838de1cee..ee068c811cfa9 100644 --- a/providers/src/airflow/providers/apache/druid/provider.yaml +++ b/providers/src/airflow/providers/apache/druid/provider.yaml @@ -76,7 +76,6 @@ operators: - integration-name: Apache Druid python-modules: - airflow.providers.apache.druid.operators.druid - - airflow.providers.apache.druid.operators.druid_check hooks: - integration-name: Apache Druid From 5d77bda57e2cc5666db8663ed0dcd9e8794a5f26 Mon Sep 17 00:00:00 2001 From: Shahar Epstein <60007259+shahar1@users.noreply.github.com> Date: Sat, 7 Dec 2024 18:35:19 +0200 Subject: [PATCH 22/25] Prevent using `trigger_rule=TriggerRule.ALWAYS` in a task-generated mapping within bare tasks (#44751) --- airflow/decorators/base.py | 21 ++++++++++ .../dynamic-task-mapping.rst | 10 +++-- newsfragments/44751.bugfix.rst | 1 + .../src/airflow/sdk/definitions/taskgroup.py | 4 +- tests/decorators/test_mapped.py | 38 +++++++++++++++++++ tests/decorators/test_task_group.py | 5 ++- 6 files changed, 72 insertions(+), 7 deletions(-) create mode 100644 newsfragments/44751.bugfix.rst diff --git a/airflow/decorators/base.py b/airflow/decorators/base.py index 9a9498d49c307..9b303e1a703e7 100644 --- a/airflow/decorators/base.py +++ b/airflow/decorators/base.py @@ -398,6 +398,12 @@ def _validate_arg_names(self, func: ValidationSource, kwargs: dict[str, Any]): super()._validate_arg_names(func, kwargs) def expand(self, **map_kwargs: OperatorExpandArgument) -> XComArg: + if self.kwargs.get("trigger_rule") == TriggerRule.ALWAYS and any( + [isinstance(expanded, XComArg) for expanded in map_kwargs.values()] + ): + raise ValueError( + "Task-generated mapping within a task using 'expand' is not allowed with trigger rule 'always'." + ) if not map_kwargs: raise TypeError("no arguments to expand against") self._validate_arg_names("expand", map_kwargs) @@ -411,6 +417,21 @@ def expand(self, **map_kwargs: OperatorExpandArgument) -> XComArg: return self._expand(DictOfListsExpandInput(map_kwargs), strict=False) def expand_kwargs(self, kwargs: OperatorExpandKwargsArgument, *, strict: bool = True) -> XComArg: + if ( + self.kwargs.get("trigger_rule") == TriggerRule.ALWAYS + and not isinstance(kwargs, XComArg) + and any( + [ + isinstance(v, XComArg) + for kwarg in kwargs + if not isinstance(kwarg, XComArg) + for v in kwarg.values() + ] + ) + ): + raise ValueError( + "Task-generated mapping within a task using 'expand_kwargs' is not allowed with trigger rule 'always'." + ) if isinstance(kwargs, Sequence): for item in kwargs: if not isinstance(item, (XComArg, Mapping)): diff --git a/docs/apache-airflow/authoring-and-scheduling/dynamic-task-mapping.rst b/docs/apache-airflow/authoring-and-scheduling/dynamic-task-mapping.rst index 426a720781ec8..00fb5b473e78d 100644 --- a/docs/apache-airflow/authoring-and-scheduling/dynamic-task-mapping.rst +++ b/docs/apache-airflow/authoring-and-scheduling/dynamic-task-mapping.rst @@ -84,10 +84,6 @@ The grid view also provides visibility into your mapped tasks in the details pan Although we show a "reduce" task here (``sum_it``) you don't have to have one, the mapped tasks will still be executed even if they have no downstream tasks. -.. warning:: ``TriggerRule.ALWAYS`` cannot be utilized in expanded tasks - - Assigning ``trigger_rule=TriggerRule.ALWAYS`` in expanded tasks is forbidden, as expanded parameters will be undefined with the task's immediate execution. - This is enforced at the time of the DAG parsing, and will raise an error if you try to use it. Task-generated Mapping ---------------------- @@ -113,6 +109,12 @@ The above examples we've shown could all be achieved with a ``for`` loop in the The ``make_list`` task runs as a normal task and must return a list or dict (see `What data types can be expanded?`_), and then the ``consumer`` task will be called four times, once with each value in the return of ``make_list``. +.. warning:: Task-generated mapping cannot be utilized with ``TriggerRule.ALWAYS`` + + Assigning ``trigger_rule=TriggerRule.ALWAYS`` in task-generated mapping is not allowed, as expanded parameters are undefined with the task's immediate execution. + This is enforced at the time of the DAG parsing, for both tasks and mapped tasks groups, and will raise an error if you try to use it. + In the recent example, setting ``trigger_rule=TriggerRule.ALWAYS`` in the ``consumer`` task will raise an error since ``make_list`` is a task-generated mapping. + Repeated mapping ---------------- diff --git a/newsfragments/44751.bugfix.rst b/newsfragments/44751.bugfix.rst new file mode 100644 index 0000000000000..1ca32178be1c5 --- /dev/null +++ b/newsfragments/44751.bugfix.rst @@ -0,0 +1 @@ +``TriggerRule.ALWAYS`` cannot be utilized within a task-generated mapping, either in bare tasks (fixed in this PR) or mapped task groups (fixed in PR #44368). The issue with doing so, is that the task is immediately executed without waiting for the upstreams's mapping results, which certainly leads to failure of the task. This fix avoids it by raising an exception when it is detected during DAG parsing. diff --git a/task_sdk/src/airflow/sdk/definitions/taskgroup.py b/task_sdk/src/airflow/sdk/definitions/taskgroup.py index fd02a4c94e714..52b30ba31f8af 100644 --- a/task_sdk/src/airflow/sdk/definitions/taskgroup.py +++ b/task_sdk/src/airflow/sdk/definitions/taskgroup.py @@ -597,7 +597,9 @@ def __iter__(self): for child in self.children.values(): if isinstance(child, AbstractOperator) and child.trigger_rule == TriggerRule.ALWAYS: - raise ValueError("Tasks in a mapped task group cannot have trigger_rule set to 'ALWAYS'") + raise ValueError( + "Task-generated mapping within a mapped task group is not allowed with trigger rule 'always'" + ) yield from self._iter_child(child) def iter_mapped_dependencies(self) -> Iterator[DAGNode]: diff --git a/tests/decorators/test_mapped.py b/tests/decorators/test_mapped.py index 9bd59f03be7e1..4b3faf119fd81 100644 --- a/tests/decorators/test_mapped.py +++ b/tests/decorators/test_mapped.py @@ -61,3 +61,41 @@ def f(x: int, y: int) -> int: xcoms.add(ti.xcom_pull(session=session, task_ids=ti.task_id, map_indexes=ti.map_index)) assert xcoms == {11, 12, 13} + + +@pytest.mark.db_test +def test_fail_task_generated_mapping_with_trigger_rule_always__exapnd(dag_maker, session): + with DAG(dag_id="d", schedule=None, start_date=DEFAULT_DATE): + + @task + def get_input(): + return ["world", "moon"] + + @task(trigger_rule="always") + def hello(input): + print(f"Hello, {input}") + + with pytest.raises( + ValueError, + match="Task-generated mapping within a task using 'expand' is not allowed with trigger rule 'always'", + ): + hello.expand(input=get_input()) + + +@pytest.mark.db_test +def test_fail_task_generated_mapping_with_trigger_rule_always__exapnd_kwargs(dag_maker, session): + with DAG(dag_id="d", schedule=None, start_date=DEFAULT_DATE): + + @task + def get_input(): + return ["world", "moon"] + + @task(trigger_rule="always") + def hello(input, input2): + print(f"Hello, {input}, {input2}") + + with pytest.raises( + ValueError, + match="Task-generated mapping within a task using 'expand_kwargs' is not allowed with trigger rule 'always'", + ): + hello.expand_kwargs([{"input": get_input(), "input2": get_input()}]) diff --git a/tests/decorators/test_task_group.py b/tests/decorators/test_task_group.py index 2dab23ca38fc7..ce1b518a8ff59 100644 --- a/tests/decorators/test_task_group.py +++ b/tests/decorators/test_task_group.py @@ -135,7 +135,7 @@ def tg(): @pytest.mark.db_test -def test_expand_fail_trigger_rule_always(dag_maker, session): +def test_fail_task_generated_mapping_with_trigger_rule_always(dag_maker, session): @dag(schedule=None, start_date=pendulum.datetime(2022, 1, 1)) def pipeline(): @task @@ -151,7 +151,8 @@ def tg(param): t1(param) with pytest.raises( - ValueError, match="Tasks in a mapped task group cannot have trigger_rule set to 'ALWAYS'" + ValueError, + match="Task-generated mapping within a mapped task group is not allowed with trigger rule 'always'", ): tg.expand(param=get_param()) From 16206ef559efd72c8be56fcbb0f1b1f0d40159c1 Mon Sep 17 00:00:00 2001 From: Ash Berlin-Taylor Date: Sat, 7 Dec 2024 16:41:10 +0000 Subject: [PATCH 23/25] Correctly ensure that we give subprocesses time to exit after signalling it (#44766) We had a bug hidden in our tests by our use of mocks -- if the subprocess returned any output, then `self.selector.select()` would return straight away, not waiting for the maximum timeout, which would result in the "escalation" signal being sent after one output, not after the given interval. --- .../airflow/sdk/execution_time/supervisor.py | 28 ++- .../tests/execution_time/test_supervisor.py | 206 ++++++++---------- 2 files changed, 110 insertions(+), 124 deletions(-) diff --git a/task_sdk/src/airflow/sdk/execution_time/supervisor.py b/task_sdk/src/airflow/sdk/execution_time/supervisor.py index 3bc714e641595..1b2a7ba75773c 100644 --- a/task_sdk/src/airflow/sdk/execution_time/supervisor.py +++ b/task_sdk/src/airflow/sdk/execution_time/supervisor.py @@ -445,12 +445,21 @@ def kill( try: self._process.send_signal(sig) - # Service subprocess events during the escalation delay - self._service_subprocess(max_wait_time=escalation_delay, raise_on_timeout=True) - if self._exit_code is not None: - log.info("Process exited", pid=self.pid, exit_code=self._exit_code, signal=sig.name) - return - except psutil.TimeoutExpired: + start = time.monotonic() + end = start + escalation_delay + now = start + + while now < end: + # Service subprocess events during the escalation delay. This will return as soon as it's + # read from any of the sockets, so we need to re-run it if the process is still alive + if ( + exit_code := self._service_subprocess(max_wait_time=end - now, raise_on_timeout=False) + ) is not None: + log.info("Process exited", pid=self.pid, exit_code=exit_code, signal=sig.name) + return + + now = time.monotonic() + msg = "Process did not terminate in time" if sig != escalation_path[-1]: msg += "; escalating" @@ -539,6 +548,7 @@ def _service_subprocess(self, max_wait_time: float, raise_on_timeout: bool = Fal :param max_wait_time: Maximum time to block while waiting for events, in seconds. :param raise_on_timeout: If True, raise an exception if the subprocess does not exit within the timeout. + :returns: The process exit code, or None if it's still alive """ events = self.selector.select(timeout=max_wait_time) for key, _ in events: @@ -559,9 +569,9 @@ def _service_subprocess(self, max_wait_time: float, raise_on_timeout: bool = Fal key.fileobj.close() # type: ignore[union-attr] # Check if the subprocess has exited - self._check_subprocess_exit(raise_on_timeout=raise_on_timeout) + return self._check_subprocess_exit(raise_on_timeout=raise_on_timeout) - def _check_subprocess_exit(self, raise_on_timeout: bool = False): + def _check_subprocess_exit(self, raise_on_timeout: bool = False) -> int | None: """Check if the subprocess has exited.""" if self._exit_code is None: try: @@ -570,7 +580,7 @@ def _check_subprocess_exit(self, raise_on_timeout: bool = False): except psutil.TimeoutExpired: if raise_on_timeout: raise - pass + return self._exit_code def _send_heartbeat_if_needed(self): """Send a heartbeat to the client if heartbeat interval has passed.""" diff --git a/task_sdk/tests/execution_time/test_supervisor.py b/task_sdk/tests/execution_time/test_supervisor.py index 96506a48bda77..cb43904f17262 100644 --- a/task_sdk/tests/execution_time/test_supervisor.py +++ b/task_sdk/tests/execution_time/test_supervisor.py @@ -573,152 +573,128 @@ def watched_subprocess(self, mocker, mock_process): proc.selector = mock_selector return proc + def test_kill_process_already_exited(self, watched_subprocess, mock_process): + """Test behavior when the process has already exited.""" + mock_process.wait.side_effect = psutil.NoSuchProcess(pid=1234) + + watched_subprocess.kill(signal.SIGINT, force=True) + + mock_process.send_signal.assert_called_once_with(signal.SIGINT) + mock_process.wait.assert_called_once() + assert watched_subprocess._exit_code == -1 + + def test_kill_process_custom_signal(self, watched_subprocess, mock_process): + """Test that the process is killed with the correct signal.""" + mock_process.wait.return_value = 0 + + signal_to_send = signal.SIGUSR1 + watched_subprocess.kill(signal_to_send, force=False) + + mock_process.send_signal.assert_called_once_with(signal_to_send) + mock_process.wait.assert_called_once_with(timeout=0) + @pytest.mark.parametrize( - ["signal_to_send", "wait_side_effect", "expected_signals"], + ["signal_to_send", "exit_after"], [ pytest.param( signal.SIGINT, - [0], - [signal.SIGINT], + signal.SIGINT, id="SIGINT-success-without-escalation", ), pytest.param( signal.SIGINT, - [psutil.TimeoutExpired(0.1), 0], - [signal.SIGINT, signal.SIGTERM], + signal.SIGTERM, id="SIGINT-escalates-to-SIGTERM", ), pytest.param( signal.SIGINT, - [ - psutil.TimeoutExpired(0.1), # SIGINT times out - psutil.TimeoutExpired(0.1), # SIGTERM times out - 0, # SIGKILL succeeds - ], - [signal.SIGINT, signal.SIGTERM, signal.SIGKILL], + None, id="SIGINT-escalates-to-SIGTERM-then-SIGKILL", ), pytest.param( signal.SIGTERM, - [ - psutil.TimeoutExpired(0.1), # SIGTERM times out - 0, # SIGKILL succeeds - ], - [signal.SIGTERM, signal.SIGKILL], + None, id="SIGTERM-escalates-to-SIGKILL", ), pytest.param( signal.SIGKILL, - [0], - [signal.SIGKILL], + None, id="SIGKILL-success-without-escalation", ), ], ) - def test_force_kill_escalation( - self, - watched_subprocess, - mock_process, - mocker, - signal_to_send, - wait_side_effect, - expected_signals, - captured_logs, - ): - """Test escalation path for SIGINT, SIGTERM, and SIGKILL when force=True.""" - # Mock the process wait method to return the exit code or raise an exception - mock_process.wait.side_effect = wait_side_effect - - watched_subprocess.kill(signal_to_send=signal_to_send, escalation_delay=0.1, force=True) - - # Check that the correct signals were sent - mock_process.send_signal.assert_has_calls([mocker.call(sig) for sig in expected_signals]) - - # Check that the process was waited on for each signal - mock_process.wait.assert_has_calls([mocker.call(timeout=0)] * len(expected_signals)) - - ## Validate log messages - # If escalation occurred, we should see a warning log for each signal sent - if len(expected_signals) > 1: - assert { - "event": "Process did not terminate in time; escalating", - "level": "warning", - "logger": "supervisor", - "pid": 12345, - "signal": expected_signals[-2].name, - "timestamp": mocker.ANY, - } in captured_logs + def test_kill_escalation_path(self, signal_to_send, exit_after, mocker, captured_logs, monkeypatch): + def subprocess_main(): + import signal + + def _handler(sig, frame): + print(f"Signal {sig} received", file=sys.stderr) + if exit_after == sig: + sleep(0.1) + exit(sig) + sleep(5) + print("Should not get here") + + signal.signal(signal.SIGINT, _handler) + signal.signal(signal.SIGTERM, _handler) + try: + sys.stdin.readline() + print("Ready") + sleep(10) + except Exception as e: + print(e) + # Shouldn't get here + exit(5) - # Regardless of escalation, we should see an info log for the final signal sent - assert { - "event": "Process exited", - "level": "info", - "logger": "supervisor", - "pid": 12345, - "signal": expected_signals[-1].name, - "exit_code": 0, - "timestamp": mocker.ANY, - } in captured_logs + ti_id = uuid7() - # Validate `selector.select` calls - assert watched_subprocess.selector.select.call_count == len(expected_signals) - watched_subprocess.selector.select.assert_has_calls( - [mocker.call(timeout=0.1)] * len(expected_signals) + proc = WatchedSubprocess.start( + path=os.devnull, + ti=TaskInstance(id=ti_id, task_id="b", dag_id="c", run_id="d", try_number=1), + client=MagicMock(spec=sdk_client.Client), + target=subprocess_main, ) + # Ensure we get one normal run, to give the proc time to register it's custom sighandler + proc._service_subprocess(max_wait_time=1) + proc.kill(signal_to_send=signal_to_send, escalation_delay=0.5, force=True) - assert watched_subprocess._exit_code == 0 - - def test_force_kill_with_selector_events(self, watched_subprocess, mock_process, mocker): - """Test force escalation with selector events handled during wait.""" - # Mock selector to return events during escalation - mock_key = mocker.Mock() - mock_key.fileobj = mocker.Mock() + # Wait for the subprocess to finish + assert proc.wait() == exit_after or -signal.SIGKILL + exit_after = exit_after or signal.SIGKILL - # Simulate EOF - mock_key.data = mocker.Mock(return_value=False) - - watched_subprocess.selector.select.side_effect = [ - [(mock_key, None)], # Event during SIGINT - [], # No event during SIGTERM - [(mock_key, None)], # Event during SIGKILL - ] - - mock_process.wait.side_effect = [ - psutil.TimeoutExpired(0.1), # SIGINT times out - psutil.TimeoutExpired(0.1), # SIGTERM times out - 0, # SIGKILL succeeds + logs = [{"event": m["event"], "chan": m.get("chan"), "logger": m["logger"]} for m in captured_logs] + expected_logs = [ + {"chan": "stdout", "event": "Ready", "logger": "task"}, ] + # Work out what logs we expect to see + if signal_to_send == signal.SIGINT: + expected_logs.append({"chan": "stderr", "event": "Signal 2 received", "logger": "task"}) + if signal_to_send == signal.SIGTERM or ( + signal_to_send == signal.SIGINT and exit_after != signal.SIGINT + ): + if signal_to_send == signal.SIGINT: + expected_logs.append( + { + "chan": None, + "event": "Process did not terminate in time; escalating", + "logger": "supervisor", + } + ) + expected_logs.append({"chan": "stderr", "event": "Signal 15 received", "logger": "task"}) + if exit_after == signal.SIGKILL: + if signal_to_send in {signal.SIGINT, signal.SIGTERM}: + expected_logs.append( + { + "chan": None, + "event": "Process did not terminate in time; escalating", + "logger": "supervisor", + } + ) + # expected_logs.push({"chan": "stderr", "event": "Signal 9 received", "logger": "task"}) + ... - watched_subprocess.kill(signal.SIGINT, escalation_delay=0.1, force=True) - - # Validate selector interactions - assert watched_subprocess.selector.select.call_count == 3 - mock_key.data.assert_has_calls([mocker.call(mock_key.fileobj), mocker.call(mock_key.fileobj)]) - - # Validate signal escalation - mock_process.send_signal.assert_has_calls( - [mocker.call(signal.SIGINT), mocker.call(signal.SIGTERM), mocker.call(signal.SIGKILL)] - ) - - def test_kill_process_already_exited(self, watched_subprocess, mock_process): - """Test behavior when the process has already exited.""" - mock_process.wait.side_effect = psutil.NoSuchProcess(pid=1234) - - watched_subprocess.kill(signal.SIGINT, force=True) - - mock_process.send_signal.assert_called_once_with(signal.SIGINT) - mock_process.wait.assert_called_once() - assert watched_subprocess._exit_code == -1 - - def test_kill_process_custom_signal(self, watched_subprocess, mock_process): - """Test that the process is killed with the correct signal.""" - mock_process.wait.return_value = 0 - - signal_to_send = signal.SIGUSR1 - watched_subprocess.kill(signal_to_send, force=False) - - mock_process.send_signal.assert_called_once_with(signal_to_send) - mock_process.wait.assert_called_once_with(timeout=0) + expected_logs.extend(({"chan": None, "event": "Process exited", "logger": "supervisor"},)) + assert logs == expected_logs def test_service_subprocess(self, watched_subprocess, mock_process, mocker): """Test `_service_subprocess` processes selector events and handles subprocess exit.""" From 82725e176d69127de3745be7bb691baf09c37d76 Mon Sep 17 00:00:00 2001 From: Ash Berlin-Taylor Date: Sat, 7 Dec 2024 17:31:50 +0000 Subject: [PATCH 24/25] Ensure that we don't try sending any more heartbeat messages once the process (#44767) has exited. We noticed sometimes in CI that we would get 3 requests made, which "shouldn't" happen, once it gets the 4xx error to the heartbeat it is meant to kill the task process. The vause of this was mostly an artifect of the short heartbeat interval we used in the tests, and how we poll for the subprocess exit code. I don't think it could have happened in practice (and it wouldn't affect anything if it did) but I've made it more-robust anyway. --- .../airflow/sdk/execution_time/supervisor.py | 9 ++++++--- .../tests/execution_time/test_supervisor.py | 20 ++++++++++--------- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/task_sdk/src/airflow/sdk/execution_time/supervisor.py b/task_sdk/src/airflow/sdk/execution_time/supervisor.py index 1b2a7ba75773c..9b4933de8080e 100644 --- a/task_sdk/src/airflow/sdk/execution_time/supervisor.py +++ b/task_sdk/src/airflow/sdk/execution_time/supervisor.py @@ -518,11 +518,14 @@ def _monitor_subprocess(self): ) # Block until events are ready or the timeout is reached # This listens for activity (e.g., subprocess output) on registered file objects - self._service_subprocess(max_wait_time=max_wait_time) + alive = self._service_subprocess(max_wait_time=max_wait_time) is None - self._send_heartbeat_if_needed() + if alive: + # We don't need to heartbeat if the process has shutdown, as we are just finishing of reading the + # logs + self._send_heartbeat_if_needed() - self._handle_task_overtime_if_needed() + self._handle_task_overtime_if_needed() def _handle_task_overtime_if_needed(self): """Handle termination of auxiliary processes if the task exceeds the configured overtime.""" diff --git a/task_sdk/tests/execution_time/test_supervisor.py b/task_sdk/tests/execution_time/test_supervisor.py index cb43904f17262..e51f418dad4df 100644 --- a/task_sdk/tests/execution_time/test_supervisor.py +++ b/task_sdk/tests/execution_time/test_supervisor.py @@ -349,11 +349,14 @@ def test_state_conflict_on_heartbeat(self, captured_logs, monkeypatch, mocker): """ import airflow.sdk.execution_time.supervisor - monkeypatch.setattr(airflow.sdk.execution_time.supervisor, "MIN_HEARTBEAT_INTERVAL", 0.1) + # Heartbeat every time around the loop + monkeypatch.setattr(airflow.sdk.execution_time.supervisor, "MIN_HEARTBEAT_INTERVAL", 0.0) def subprocess_main(): sys.stdin.readline() sleep(5) + # Shouldn't get here + exit(5) ti_id = uuid7() @@ -389,23 +392,22 @@ def handle_request(request: httpx.Request) -> httpx.Response: # Wait for the subprocess to finish -- it should have been terminated assert proc.wait() == -signal.SIGTERM - count_request = request_count["count"] + assert request_count["count"] == 2 # Verify the number of requests made - assert count_request >= 2 assert captured_logs == [ { - "event": "Server indicated the task shouldn't be running anymore", - "level": "error", - "status_code": 409, - "logger": "supervisor", - "timestamp": mocker.ANY, "detail": { "reason": "not_running", "message": "TI is no longer in the running state and task should terminate", "current_state": "success", }, + "event": "Server indicated the task shouldn't be running anymore", + "level": "error", + "status_code": 409, + "logger": "supervisor", + "timestamp": mocker.ANY, } - ] * (count_request - 1) + ] @pytest.mark.parametrize("captured_logs", [logging.WARNING], indirect=True) def test_heartbeat_failures_handling(self, monkeypatch, mocker, captured_logs, time_machine): From 7f42a77ca6096c147983f8a8bae7db362821e6fb Mon Sep 17 00:00:00 2001 From: Jens Scheffler <95105677+jscheffl@users.noreply.github.com> Date: Sat, 7 Dec 2024 18:38:38 +0100 Subject: [PATCH 25/25] Remove Provider Deprecations in Apprise (#44764) * Remove Provider Deprecations in Apprise * Typo --- .../airflow/providers/apprise/CHANGELOG.rst | 14 ++++++++ .../providers/apprise/hooks/apprise.py | 12 +------ .../apprise/notifications/apprise.py | 32 ++----------------- providers/tests/apprise/hooks/test_apprise.py | 4 +-- .../apprise/notifications/test_apprise.py | 31 ++++-------------- 5 files changed, 26 insertions(+), 67 deletions(-) diff --git a/providers/src/airflow/providers/apprise/CHANGELOG.rst b/providers/src/airflow/providers/apprise/CHANGELOG.rst index 67be050bb3eaf..830ab8d0f1f35 100644 --- a/providers/src/airflow/providers/apprise/CHANGELOG.rst +++ b/providers/src/airflow/providers/apprise/CHANGELOG.rst @@ -27,6 +27,20 @@ Changelog --------- +main +.... + +.. warning:: + All deprecated classes, parameters and features have been removed from the {provider_name} provider package. + The following breaking changes were introduced: + + * Hooks + * Parameter ``tag`` cannot be None. It is not set to MATCH_ALL_TAG as default. + * Notifications + * Parameter ``notify_type`` cannot be None. It is not set to NotifyType.INFO as default. + * Parameter ``body_format`` cannot be None. It is not set to NotifyFormat.TEXT as default. + * Parameter ``tag`` cannot be None. It is not set to MATCH_ALL_TAG as default. + 1.4.1 ..... diff --git a/providers/src/airflow/providers/apprise/hooks/apprise.py b/providers/src/airflow/providers/apprise/hooks/apprise.py index 4904e5100c64e..4fe373281f6c7 100644 --- a/providers/src/airflow/providers/apprise/hooks/apprise.py +++ b/providers/src/airflow/providers/apprise/hooks/apprise.py @@ -18,14 +18,12 @@ from __future__ import annotations import json -import warnings from collections.abc import Iterable from typing import TYPE_CHECKING, Any import apprise from apprise import AppriseConfig, NotifyFormat, NotifyType -from airflow.exceptions import AirflowProviderDeprecationWarning from airflow.hooks.base import BaseHook if TYPE_CHECKING: @@ -77,7 +75,7 @@ def notify( title: str | None = None, notify_type: NotifyType = NotifyType.INFO, body_format: NotifyFormat = NotifyFormat.TEXT, - tag: str | Iterable[str] | None = None, + tag: str | Iterable[str] = "all", attach: AppriseAttachment | None = None, interpret_escapes: bool | None = None, config: AppriseConfig | None = None, @@ -97,14 +95,6 @@ def notify( sequences such as \n and \r to their respective ascii new-line and carriage return characters :param config: Specify one or more configuration """ - if tag is None: - warnings.warn( - "`tag` cannot be None. Assign it to be MATCH_ALL_TAG", - AirflowProviderDeprecationWarning, - stacklevel=2, - ) - tag = "all" - title = title or "" apprise_obj = apprise.Apprise() diff --git a/providers/src/airflow/providers/apprise/notifications/apprise.py b/providers/src/airflow/providers/apprise/notifications/apprise.py index ddbb4ac9e1b73..a1fe7e3c4be17 100644 --- a/providers/src/airflow/providers/apprise/notifications/apprise.py +++ b/providers/src/airflow/providers/apprise/notifications/apprise.py @@ -17,13 +17,11 @@ from __future__ import annotations -import warnings from collections.abc import Iterable from functools import cached_property from apprise import AppriseConfig, NotifyFormat, NotifyType -from airflow.exceptions import AirflowProviderDeprecationWarning from airflow.notifications.basenotifier import BaseNotifier from airflow.providers.apprise.hooks.apprise import AppriseHook @@ -53,38 +51,14 @@ def __init__( *, body: str, title: str | None = None, - notify_type: NotifyType | None = None, - body_format: NotifyFormat | None = None, - tag: str | Iterable[str] | None = None, + notify_type: NotifyType = NotifyType.INFO, + body_format: NotifyFormat = NotifyFormat.TEXT, + tag: str | Iterable[str] = "all", attach: str | None = None, interpret_escapes: bool | None = None, config: AppriseConfig | None = None, apprise_conn_id: str = AppriseHook.default_conn_name, ): - if tag is None: - warnings.warn( - "`tag` cannot be None. Assign it to be MATCH_ALL_TAG", - AirflowProviderDeprecationWarning, - stacklevel=2, - ) - tag = "all" - - if notify_type is None: - warnings.warn( - "`notify_type` cannot be None. Assign it to be NotifyType.INFO", - AirflowProviderDeprecationWarning, - stacklevel=2, - ) - notify_type = NotifyType.INFO - - if body_format is None: - warnings.warn( - "`body_format` cannot be None. Assign it to be NotifyFormat.TEXT", - AirflowProviderDeprecationWarning, - stacklevel=2, - ) - body_format = NotifyFormat.TEXT - super().__init__() self.apprise_conn_id = apprise_conn_id self.body = body diff --git a/providers/tests/apprise/hooks/test_apprise.py b/providers/tests/apprise/hooks/test_apprise.py index b7be77f82934c..80b2256e3773d 100644 --- a/providers/tests/apprise/hooks/test_apprise.py +++ b/providers/tests/apprise/hooks/test_apprise.py @@ -24,7 +24,6 @@ import pytest from apprise import NotifyFormat, NotifyType -from airflow.exceptions import AirflowProviderDeprecationWarning from airflow.models import Connection from airflow.providers.apprise.hooks.apprise import AppriseHook @@ -114,8 +113,7 @@ def test_notify(self, connection): apprise_obj.add = MagicMock() with patch.object(apprise, "Apprise", return_value=apprise_obj): hook = AppriseHook() - with pytest.warns(AirflowProviderDeprecationWarning): - hook.notify(body="test") + hook.notify(body="test") apprise_obj.notify.assert_called_once_with( body="test", diff --git a/providers/tests/apprise/notifications/test_apprise.py b/providers/tests/apprise/notifications/test_apprise.py index 77e2e12d31c70..b95d49459c233 100644 --- a/providers/tests/apprise/notifications/test_apprise.py +++ b/providers/tests/apprise/notifications/test_apprise.py @@ -22,7 +22,6 @@ import pytest from apprise import NotifyFormat, NotifyType -from airflow.exceptions import AirflowProviderDeprecationWarning from airflow.operators.empty import EmptyOperator from airflow.providers.apprise.notifications.apprise import ( AppriseNotifier, @@ -37,8 +36,7 @@ class TestAppriseNotifier: def test_notifier(self, mock_apprise_hook, dag_maker): with dag_maker("test_notifier") as dag: EmptyOperator(task_id="task1") - with pytest.warns(AirflowProviderDeprecationWarning): - notifier = send_apprise_notification(body="DISK at 99%", notify_type=NotifyType.FAILURE) + notifier = send_apprise_notification(body="DISK at 99%", notify_type=NotifyType.FAILURE) notifier({"dag": dag}) mock_apprise_hook.return_value.notify.assert_called_once_with( body="DISK at 99%", @@ -55,8 +53,7 @@ def test_notifier(self, mock_apprise_hook, dag_maker): def test_notifier_with_notifier_class(self, mock_apprise_hook, dag_maker): with dag_maker("test_notifier") as dag: EmptyOperator(task_id="task1") - with pytest.warns(AirflowProviderDeprecationWarning): - notifier = AppriseNotifier(body="DISK at 99%", notify_type=NotifyType.FAILURE) + notifier = AppriseNotifier(body="DISK at 99%", notify_type=NotifyType.FAILURE) notifier({"dag": dag}) mock_apprise_hook.return_value.notify.assert_called_once_with( body="DISK at 99%", @@ -74,12 +71,11 @@ def test_notifier_templated(self, mock_apprise_hook, dag_maker): with dag_maker("test_notifier") as dag: EmptyOperator(task_id="task1") - with pytest.warns(AirflowProviderDeprecationWarning): - notifier = AppriseNotifier( - notify_type=NotifyType.FAILURE, - title="DISK at 99% {{dag.dag_id}}", - body="System can crash soon {{dag.dag_id}}", - ) + notifier = AppriseNotifier( + notify_type=NotifyType.FAILURE, + title="DISK at 99% {{dag.dag_id}}", + body="System can crash soon {{dag.dag_id}}", + ) context = {"dag": dag} notifier(context) mock_apprise_hook.return_value.notify.assert_called_once_with( @@ -92,16 +88,3 @@ def test_notifier_templated(self, mock_apprise_hook, dag_maker): interpret_escapes=None, config=None, ) - - @mock.patch("airflow.providers.apprise.notifications.apprise.AppriseHook") - def test_apprise_deprecation_warnning(self, mock_apprise_hook): - with pytest.warns(AirflowProviderDeprecationWarning) as record: - AppriseNotifier( - title="DISK at 99% {{dag.dag_id}}", - body="System can crash soon {{dag.dag_id}}", - ) - assert len(record) == 3 - - assert record[0].message.args[0] == "`tag` cannot be None. Assign it to be MATCH_ALL_TAG" - assert record[1].message.args[0] == "`notify_type` cannot be None. Assign it to be NotifyType.INFO" - assert record[2].message.args[0] == "`body_format` cannot be None. Assign it to be NotifyFormat.TEXT"