diff --git a/airflow-core/docs/extra-packages-ref.rst b/airflow-core/docs/extra-packages-ref.rst index 9c360209ac22c..5aae73989e024 100644 --- a/airflow-core/docs/extra-packages-ref.rst +++ b/airflow-core/docs/extra-packages-ref.rst @@ -106,6 +106,8 @@ python dependencies for the provided package. +---------------------+-----------------------------------------------------+----------------------------------------------------------------------------+ | pandas | ``pip install 'apache-airflow[pandas]'`` | Install Pandas library compatible with Airflow | +---------------------+-----------------------------------------------------+----------------------------------------------------------------------------+ +| polars | ``pip install 'apache-airflow[polars]'`` | Polars hooks and operators | ++---------------------+-----------------------------------------------------+----------------------------------------------------------------------------+ | rabbitmq | ``pip install 'apache-airflow[rabbitmq]'`` | RabbitMQ support as a Celery backend | +---------------------+-----------------------------------------------------+----------------------------------------------------------------------------+ | sentry | ``pip install 'apache-airflow[sentry]'`` | Sentry service for application logging and monitoring | diff --git a/airflow-core/pyproject.toml b/airflow-core/pyproject.toml index 336fb8966c6bb..4b7f5aecb02f8 100644 --- a/airflow-core/pyproject.toml +++ b/airflow-core/pyproject.toml @@ -138,7 +138,7 @@ dependencies = [ # pre-installed providers "apache-airflow-providers-common-compat>=1.6.0", "apache-airflow-providers-common-io>=1.5.3", - "apache-airflow-providers-common-sql>=1.25.0", + "apache-airflow-providers-common-sql>=1.26.0", "apache-airflow-providers-smtp>=2.0.2", "apache-airflow-providers-standard>=0.4.0", ] @@ -237,7 +237,7 @@ dev = [ "apache-airflow-task-sdk", # TODO(potiuk): eventually we do not want any providers nor apache-airflow extras to be needed for # airflow-core tests - "apache-airflow[pandas]", + "apache-airflow[pandas,polars]", "apache-airflow-providers-amazon", "apache-airflow-providers-celery", "apache-airflow-providers-cncf-kubernetes", diff --git a/providers/apache/drill/README.rst b/providers/apache/drill/README.rst index 14fae393aa758..5dfed07f21942 100644 --- a/providers/apache/drill/README.rst +++ b/providers/apache/drill/README.rst @@ -54,7 +54,7 @@ Requirements PIP package Version required ======================================= =========================== ``apache-airflow`` ``>=2.9.0`` -``apache-airflow-providers-common-sql`` ``>=1.20.0`` +``apache-airflow-providers-common-sql`` ``>=1.26.0`` ``sqlalchemy-drill`` ``>=1.1.0,!=1.1.6,!=1.1.7`` ======================================= =========================== diff --git a/providers/apache/drill/pyproject.toml b/providers/apache/drill/pyproject.toml index 284eaf3ca2c56..126c479b88bd9 100644 --- a/providers/apache/drill/pyproject.toml +++ b/providers/apache/drill/pyproject.toml @@ -58,7 +58,7 @@ requires-python = "~=3.9" # After you modify the dependencies, and rebuild your Breeze CI image with ``breeze ci-image build`` dependencies = [ "apache-airflow>=2.9.0", - "apache-airflow-providers-common-sql>=1.20.0", + "apache-airflow-providers-common-sql>=1.26.0", # Workaround until we get https://github.com/JohnOmernik/sqlalchemy-drill/issues/94 fixed. "sqlalchemy-drill>=1.1.0,!=1.1.6,!=1.1.7", ] @@ -70,7 +70,7 @@ dev = [ "apache-airflow-devel-common", "apache-airflow-providers-common-sql", # Additional devel dependencies (do not remove this line and add extra development dependencies) - "apache-airflow-providers-common-sql[pandas]", + "apache-airflow-providers-common-sql[pandas,polars]", ] # To build docs: diff --git a/providers/apache/drill/tests/unit/apache/drill/hooks/test_drill.py b/providers/apache/drill/tests/unit/apache/drill/hooks/test_drill.py index 0e7f9c6994129..920be37a96387 100644 --- a/providers/apache/drill/tests/unit/apache/drill/hooks/test_drill.py +++ b/providers/apache/drill/tests/unit/apache/drill/hooks/test_drill.py @@ -90,13 +90,13 @@ def test_get_records(self): assert self.cur.close.call_count == 1 self.cur.execute.assert_called_once_with(statement) - def test_get_pandas_df(self): + def test_get_df_pandas(self): statement = "SQL" column = "col" result_sets = [("row1",), ("row2",)] self.cur.description = [(column,)] self.cur.fetchall.return_value = result_sets - df = self.db_hook().get_pandas_df(statement) + df = self.db_hook().get_df(statement, df_type="pandas") assert column == df.columns[0] for i, item in enumerate(result_sets): @@ -104,3 +104,19 @@ def test_get_pandas_df(self): assert self.conn.close.call_count == 1 assert self.cur.close.call_count == 1 self.cur.execute.assert_called_once_with(statement) + + def test_get_df_polars(self): + statement = "SQL" + column = "col" + result_sets = [("row1",), ("row2",)] + mock_execute = MagicMock() + mock_execute.description = [(column, None, None, None, None, None, None)] + mock_execute.fetchall.return_value = result_sets + self.cur.execute.return_value = mock_execute + df = self.db_hook().get_df(statement, df_type="polars") + + self.cur.execute.assert_called_once_with(statement) + mock_execute.fetchall.assert_called_once_with() + assert column == df.columns[0] + assert result_sets[0][0] == df.row(0)[0] + assert result_sets[1][0] == df.row(1)[0] diff --git a/providers/apache/druid/README.rst b/providers/apache/druid/README.rst index b5669178d65ea..951afb8990ed4 100644 --- a/providers/apache/druid/README.rst +++ b/providers/apache/druid/README.rst @@ -54,7 +54,7 @@ Requirements PIP package Version required ======================================= ================== ``apache-airflow`` ``>=2.9.0`` -``apache-airflow-providers-common-sql`` ``>=1.20.0`` +``apache-airflow-providers-common-sql`` ``>=1.26.0`` ``pydruid`` ``>=0.4.1`` ======================================= ================== diff --git a/providers/apache/druid/pyproject.toml b/providers/apache/druid/pyproject.toml index 43f1a4898a34a..87cedc07f2ae2 100644 --- a/providers/apache/druid/pyproject.toml +++ b/providers/apache/druid/pyproject.toml @@ -58,7 +58,7 @@ requires-python = "~=3.9" # After you modify the dependencies, and rebuild your Breeze CI image with ``breeze ci-image build`` dependencies = [ "apache-airflow>=2.9.0", - "apache-airflow-providers-common-sql>=1.20.0", + "apache-airflow-providers-common-sql>=1.26.0", "pydruid>=0.4.1", ] @@ -77,6 +77,7 @@ dev = [ "apache-airflow-providers-apache-hive", "apache-airflow-providers-common-sql", # Additional devel dependencies (do not remove this line and add extra development dependencies) + "apache-airflow-providers-common-sql[polars]", ] # To build docs: diff --git a/providers/apache/druid/tests/unit/apache/druid/hooks/test_druid.py b/providers/apache/druid/tests/unit/apache/druid/hooks/test_druid.py index b035aac3d76c4..b95a04d004ded 100644 --- a/providers/apache/druid/tests/unit/apache/druid/hooks/test_druid.py +++ b/providers/apache/druid/tests/unit/apache/druid/hooks/test_druid.py @@ -452,13 +452,13 @@ def test_get_records(self): assert self.cur.close.call_count == 1 self.cur.execute.assert_called_once_with(statement) - def test_get_pandas_df(self): + def test_get_df_pandas(self): statement = "SQL" column = "col" result_sets = [("row1",), ("row2",)] self.cur.description = [(column,)] self.cur.fetchall.return_value = result_sets - df = self.db_hook().get_pandas_df(statement) + df = self.db_hook().get_df(statement, df_type="pandas") assert column == df.columns[0] for i, item in enumerate(result_sets): @@ -466,3 +466,17 @@ def test_get_pandas_df(self): assert self.conn.close.call_count == 1 assert self.cur.close.call_count == 1 self.cur.execute.assert_called_once_with(statement) + + def test_get_df_polars(self): + statement = "SQL" + column = "col" + result_sets = [("row1",), ("row2",)] + mock_execute = MagicMock() + mock_execute.description = [(column, None, None, None, None, None, None)] + mock_execute.fetchall.return_value = result_sets + self.cur.execute.return_value = mock_execute + + df = self.db_hook().get_df(statement, df_type="polars") + assert column == df.columns[0] + assert result_sets[0][0] == df.row(0)[0] + assert result_sets[1][0] == df.row(1)[0] diff --git a/providers/apache/impala/README.rst b/providers/apache/impala/README.rst index 6e672c00910ea..d0d6db02155fb 100644 --- a/providers/apache/impala/README.rst +++ b/providers/apache/impala/README.rst @@ -54,7 +54,7 @@ Requirements PIP package Version required ======================================= ================== ``impyla`` ``>=0.18.0,<1.0`` -``apache-airflow-providers-common-sql`` ``>=1.20.0`` +``apache-airflow-providers-common-sql`` ``>=1.26.0`` ``apache-airflow`` ``>=2.9.0`` ======================================= ================== diff --git a/providers/apache/impala/pyproject.toml b/providers/apache/impala/pyproject.toml index c09afe3d0c981..7e06348922e64 100644 --- a/providers/apache/impala/pyproject.toml +++ b/providers/apache/impala/pyproject.toml @@ -58,7 +58,7 @@ requires-python = "~=3.9" # After you modify the dependencies, and rebuild your Breeze CI image with ``breeze ci-image build`` dependencies = [ "impyla>=0.18.0,<1.0", - "apache-airflow-providers-common-sql>=1.20.0", + "apache-airflow-providers-common-sql>=1.26.0", "apache-airflow>=2.9.0", ] @@ -77,7 +77,7 @@ dev = [ "apache-airflow-providers-common-sql", # Additional devel dependencies (do not remove this line and add extra development dependencies) "kerberos>=1.3.0", - "apache-airflow-providers-common-sql[pandas]" + "apache-airflow-providers-common-sql[pandas,polars]" ] # To build docs: diff --git a/providers/apache/impala/tests/unit/apache/impala/hooks/test_impala.py b/providers/apache/impala/tests/unit/apache/impala/hooks/test_impala.py index 01a360c8771b0..3283e3092349b 100644 --- a/providers/apache/impala/tests/unit/apache/impala/hooks/test_impala.py +++ b/providers/apache/impala/tests/unit/apache/impala/hooks/test_impala.py @@ -107,13 +107,13 @@ def test_get_records(impala_hook_fixture): impala_hook_fixture.get_conn.return_value.cursor.return_value.execute.assert_called_once_with(statement) -def test_get_pandas_df(impala_hook_fixture): +def test_get_df(impala_hook_fixture): statement = "SQL" column = "col" result_sets = [("row1",), ("row2",)] impala_hook_fixture.get_conn.return_value.cursor.return_value.description = [(column,)] impala_hook_fixture.get_conn.return_value.cursor.return_value.fetchall.return_value = result_sets - df = impala_hook_fixture.get_pandas_df(statement) + df = impala_hook_fixture.get_df(statement, df_type="pandas") assert column == df.columns[0] @@ -121,3 +121,18 @@ def test_get_pandas_df(impala_hook_fixture): assert result_sets[1][0] == df.values.tolist()[1][0] impala_hook_fixture.get_conn.return_value.cursor.return_value.execute.assert_called_once_with(statement) + + +def test_get_df_polars(impala_hook_fixture): + statement = "SQL" + column = "col" + result_sets = [("row1",), ("row2",)] + mock_execute = MagicMock() + mock_execute.description = [(column, None, None, None, None, None, None)] + mock_execute.fetchall.return_value = result_sets + impala_hook_fixture.get_conn.return_value.cursor.return_value.execute.return_value = mock_execute + + df = impala_hook_fixture.get_df(statement, df_type="polars") + assert column == df.columns[0] + assert result_sets[0][0] == df.row(0)[0] + assert result_sets[1][0] == df.row(1)[0] diff --git a/providers/apache/pinot/README.rst b/providers/apache/pinot/README.rst index c488d18a69a65..40f49a007c7ae 100644 --- a/providers/apache/pinot/README.rst +++ b/providers/apache/pinot/README.rst @@ -54,7 +54,7 @@ Requirements PIP package Version required ======================================= ================== ``apache-airflow`` ``>=2.9.0`` -``apache-airflow-providers-common-sql`` ``>=1.20.0`` +``apache-airflow-providers-common-sql`` ``>=1.26.0`` ``pinotdb`` ``>=5.1.0`` ======================================= ================== diff --git a/providers/apache/pinot/pyproject.toml b/providers/apache/pinot/pyproject.toml index 2b970bfc839ee..dc8a69a95317a 100644 --- a/providers/apache/pinot/pyproject.toml +++ b/providers/apache/pinot/pyproject.toml @@ -58,7 +58,7 @@ requires-python = "~=3.9" # After you modify the dependencies, and rebuild your Breeze CI image with ``breeze ci-image build`` dependencies = [ "apache-airflow>=2.9.0", - "apache-airflow-providers-common-sql>=1.20.0", + "apache-airflow-providers-common-sql>=1.26.0", "pinotdb>=5.1.0", ] @@ -69,7 +69,7 @@ dev = [ "apache-airflow-devel-common", "apache-airflow-providers-common-sql", # Additional devel dependencies (do not remove this line and add extra development dependencies) - "apache-airflow-providers-common-sql[pandas]" + "apache-airflow-providers-common-sql[pandas,polars]" ] # To build docs: diff --git a/providers/apache/pinot/tests/unit/apache/pinot/hooks/test_pinot.py b/providers/apache/pinot/tests/unit/apache/pinot/hooks/test_pinot.py index 8a433eace11fe..151379734da85 100644 --- a/providers/apache/pinot/tests/unit/apache/pinot/hooks/test_pinot.py +++ b/providers/apache/pinot/tests/unit/apache/pinot/hooks/test_pinot.py @@ -266,17 +266,30 @@ def test_get_first(self): self.cur.fetchone.return_value = result_sets[0] assert result_sets[0] == self.db_hook().get_first(statement) - def test_get_pandas_df(self): + def test_get_df_pandas(self): statement = "SQL" column = "col" result_sets = [("row1",), ("row2",)] self.cur.description = [(column,)] self.cur.fetchall.return_value = result_sets - df = self.db_hook().get_pandas_df(statement) + df = self.db_hook().get_df(statement, df_type="pandas") assert column == df.columns[0] for i, item in enumerate(result_sets): assert item[0] == df.values.tolist()[i][0] + def test_get_df_polars(self): + statement = "SQL" + column = "col" + result_sets = [("row1",), ("row2",)] + mock_execute = mock.MagicMock() + mock_execute.description = [(column, None, None, None, None, None, None)] + mock_execute.fetchall.return_value = result_sets + self.cur.execute.return_value = mock_execute + df = self.db_hook().get_df(statement, df_type="polars") + assert column == df.columns[0] + assert result_sets[0][0] == df.row(0)[0] + assert result_sets[1][0] == df.row(1)[0] + class TestPinotAdminHookWithAuth: def setup_method(self): diff --git a/providers/elasticsearch/README.rst b/providers/elasticsearch/README.rst index 86ad9cc63dbee..64a1d54bad786 100644 --- a/providers/elasticsearch/README.rst +++ b/providers/elasticsearch/README.rst @@ -54,7 +54,7 @@ Requirements PIP package Version required ======================================= ================== ``apache-airflow`` ``>=2.9.0`` -``apache-airflow-providers-common-sql`` ``>=1.20.0`` +``apache-airflow-providers-common-sql`` ``>=1.26.0`` ``elasticsearch`` ``>=8.10,<9`` ======================================= ================== diff --git a/providers/elasticsearch/pyproject.toml b/providers/elasticsearch/pyproject.toml index 8d383f5705c8b..92f7ee7c773c3 100644 --- a/providers/elasticsearch/pyproject.toml +++ b/providers/elasticsearch/pyproject.toml @@ -58,7 +58,7 @@ requires-python = "~=3.9" # After you modify the dependencies, and rebuild your Breeze CI image with ``breeze ci-image build`` dependencies = [ "apache-airflow>=2.9.0", - "apache-airflow-providers-common-sql>=1.20.0", + "apache-airflow-providers-common-sql>=1.26.0", "elasticsearch>=8.10,<9", ] @@ -69,7 +69,7 @@ dev = [ "apache-airflow-devel-common", "apache-airflow-providers-common-sql", # Additional devel dependencies (do not remove this line and add extra development dependencies) - "apache-airflow-providers-common-sql[pandas]", + "apache-airflow-providers-common-sql[pandas,polars]", ] # To build docs: diff --git a/providers/elasticsearch/tests/unit/elasticsearch/hooks/test_elasticsearch.py b/providers/elasticsearch/tests/unit/elasticsearch/hooks/test_elasticsearch.py index b8f8fe25bb6c9..9848f6ae8d77a 100644 --- a/providers/elasticsearch/tests/unit/elasticsearch/hooks/test_elasticsearch.py +++ b/providers/elasticsearch/tests/unit/elasticsearch/hooks/test_elasticsearch.py @@ -166,9 +166,9 @@ def test_get_records(self): self.spy_agency.assert_spy_called(self.cur.close) self.spy_agency.assert_spy_called(self.cur.execute) - def test_get_pandas_df(self): + def test_get_df_pandas(self): statement = "SELECT * FROM hollywood.actors" - df = self.db_hook.get_pandas_df(statement) + df = self.db_hook.get_df(statement, df_type="pandas") assert list(df.columns) == ["index", "name", "firstname", "age"] assert df.values.tolist() == ROWS diff --git a/providers/sqlite/README.rst b/providers/sqlite/README.rst index 2e030b74ea12f..a7d161228c8ea 100644 --- a/providers/sqlite/README.rst +++ b/providers/sqlite/README.rst @@ -54,7 +54,7 @@ Requirements PIP package Version required ======================================= ================== ``apache-airflow`` ``>=2.9.0`` -``apache-airflow-providers-common-sql`` ``>=1.20.0`` +``apache-airflow-providers-common-sql`` ``>=1.26.0`` ======================================= ================== Cross provider package dependencies diff --git a/providers/sqlite/pyproject.toml b/providers/sqlite/pyproject.toml index 25e9bb4a2ba22..35b3856ca4e37 100644 --- a/providers/sqlite/pyproject.toml +++ b/providers/sqlite/pyproject.toml @@ -58,7 +58,7 @@ requires-python = "~=3.9" # After you modify the dependencies, and rebuild your Breeze CI image with ``breeze ci-image build`` dependencies = [ "apache-airflow>=2.9.0", - "apache-airflow-providers-common-sql>=1.20.0", + "apache-airflow-providers-common-sql>=1.26.0", ] [dependency-groups] @@ -68,7 +68,7 @@ dev = [ "apache-airflow-devel-common", "apache-airflow-providers-common-sql", # Additional devel dependencies (do not remove this line and add extra development dependencies) - "apache-airflow-providers-common-sql[pandas]", + "apache-airflow-providers-common-sql[pandas,polars]", ] # To build docs: diff --git a/providers/sqlite/tests/unit/sqlite/hooks/test_sqlite.py b/providers/sqlite/tests/unit/sqlite/hooks/test_sqlite.py index 9023882201529..5587bb5f4fdae 100644 --- a/providers/sqlite/tests/unit/sqlite/hooks/test_sqlite.py +++ b/providers/sqlite/tests/unit/sqlite/hooks/test_sqlite.py @@ -99,13 +99,13 @@ def test_get_records(self): self.cur.close.assert_called_once_with() self.cur.execute.assert_called_once_with(statement) - def test_get_pandas_df(self): + def test_get_df_pandas(self): statement = "SQL" column = "col" result_sets = [("row1",), ("row2",)] self.cur.description = [(column,)] self.cur.fetchall.return_value = result_sets - df = self.db_hook.get_pandas_df(statement) + df = self.db_hook.get_df(statement, df_type="pandas") assert column == df.columns[0] @@ -114,6 +114,22 @@ def test_get_pandas_df(self): self.cur.execute.assert_called_once_with(statement) + def test_get_df_polars(self): + statement = "SQL" + column = "col" + result_sets = [("row1",), ("row2",)] + mock_execute = mock.MagicMock() + mock_execute.description = [(column, None, None, None, None, None, None)] + mock_execute.fetchall.return_value = result_sets + self.cur.execute.return_value = mock_execute + df = self.db_hook.get_df(statement, df_type="polars") + + self.cur.execute.assert_called_once_with(statement) + mock_execute.fetchall.assert_called_once_with() + assert column == df.columns[0] + assert result_sets[0][0] == df.row(0)[0] + assert result_sets[1][0] == df.row(1)[0] + def test_run_log(self): statement = "SQL" self.db_hook.run(statement) diff --git a/providers/vertica/README.rst b/providers/vertica/README.rst index 8e1c00e0df7a2..9fc9a29d3c2d3 100644 --- a/providers/vertica/README.rst +++ b/providers/vertica/README.rst @@ -54,7 +54,7 @@ Requirements PIP package Version required ======================================= ================== ``apache-airflow`` ``>=2.9.0`` -``apache-airflow-providers-common-sql`` ``>=1.20.0`` +``apache-airflow-providers-common-sql`` ``>=1.26.0`` ``vertica-python`` ``>=0.6.0`` ======================================= ================== diff --git a/providers/vertica/pyproject.toml b/providers/vertica/pyproject.toml index b74083ddefa33..ab4b1eb907562 100644 --- a/providers/vertica/pyproject.toml +++ b/providers/vertica/pyproject.toml @@ -58,7 +58,7 @@ requires-python = "~=3.9" # After you modify the dependencies, and rebuild your Breeze CI image with ``breeze ci-image build`` dependencies = [ "apache-airflow>=2.9.0", - "apache-airflow-providers-common-sql>=1.20.0", + "apache-airflow-providers-common-sql>=1.26.0", "vertica-python>=0.6.0", ] @@ -69,7 +69,7 @@ dev = [ "apache-airflow-devel-common", "apache-airflow-providers-common-sql", # Additional devel dependencies (do not remove this line and add extra development dependencies) - "apache-airflow-providers-common-sql[pandas]", + "apache-airflow-providers-common-sql[pandas,polars]", ] # To build docs: diff --git a/providers/vertica/tests/unit/vertica/hooks/test_vertica.py b/providers/vertica/tests/unit/vertica/hooks/test_vertica.py index e5ff2538ebeab..0063bf30534d2 100644 --- a/providers/vertica/tests/unit/vertica/hooks/test_vertica.py +++ b/providers/vertica/tests/unit/vertica/hooks/test_vertica.py @@ -169,15 +169,31 @@ def test_get_records(self): self.cur.close.assert_called_once_with() self.cur.execute.assert_called_once_with(statement) - def test_get_pandas_df(self): + def test_get_df_pandas(self): statement = "SQL" column = "col" result_sets = [("row1",), ("row2",)] self.cur.description = [(column,)] self.cur.fetchall.return_value = result_sets - df = self.db_hook.get_pandas_df(statement) + df = self.db_hook.get_df(statement, df_type="pandas") assert column == df.columns[0] assert result_sets[0][0] == df.values.tolist()[0][0] assert result_sets[1][0] == df.values.tolist()[1][0] + + def test_get_df_polars(self): + statement = "SQL" + column = "col" + result_sets = [("row1",), ("row2",)] + mock_execute = mock.MagicMock() + mock_execute.description = [(column, None, None, None, None, None, None)] + mock_execute.fetchall.return_value = result_sets + self.cur.execute.return_value = mock_execute + df = self.db_hook.get_df(statement, df_type="polars") + + self.cur.execute.assert_called_once_with(statement) + mock_execute.fetchall.assert_called_once_with() + assert column == df.columns[0] + assert result_sets[0][0] == df.row(0)[0] + assert result_sets[1][0] == df.row(1)[0] diff --git a/pyproject.toml b/pyproject.toml index 52122d3ccfba3..29ba1cd8ac629 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -377,7 +377,7 @@ packages = [] "apache-airflow-providers-zendesk>=4.9.0" ] "all" = [ - "apache-airflow[aiobotocore,apache-atlas,apache-webhdfs,async,cloudpickle,github-enterprise,google-auth,graphviz,kerberos,ldap,otel,pandas,rabbitmq,s3fs,sentry,statsd,uv]", + "apache-airflow[aiobotocore,apache-atlas,apache-webhdfs,async,cloudpickle,github-enterprise,google-auth,graphviz,kerberos,ldap,otel,pandas,polars,rabbitmq,s3fs,sentry,statsd,uv]", "apache-airflow-core[all]", "apache-airflow-providers-airbyte>=5.0.0", "apache-airflow-providers-alibaba>=3.0.0", @@ -504,10 +504,10 @@ packages = [] "python-ldap>=3.4.4", ] "pandas" = [ - # In pandas 2.2 minimal version of the sqlalchemy is 2.0 - # https://pandas.pydata.org/docs/whatsnew/v2.2.0.html#increased-minimum-versions-for-dependencies - # However Airflow not fully supports it yet: https://github.com/apache/airflow/issues/28723 - "pandas>=2.1.2,<2.3", + "apache-airflow-providers-common-sql[pandas]", +] +"polars" = [ + "apache-airflow-providers-common-sql[polars]", ] "rabbitmq" = [ "amqp>=5.2.0", @@ -781,7 +781,7 @@ testing = ["dev", "providers.tests", "tests_common", "tests", "system", "unit", ban-relative-imports = "all" # Ban certain modules from being imported at module level, instead requiring # that they're imported lazily (e.g., within a function definition). -banned-module-level-imports = ["numpy", "pandas"] +banned-module-level-imports = ["numpy", "pandas", "polars"] [tool.ruff.lint.flake8-tidy-imports.banned-api] # Direct import from the airflow package modules and constraints diff --git a/scripts/in_container/install_airflow_and_providers.py b/scripts/in_container/install_airflow_and_providers.py index ec1d6fbddb699..6a843647a9d7f 100755 --- a/scripts/in_container/install_airflow_and_providers.py +++ b/scripts/in_container/install_airflow_and_providers.py @@ -100,7 +100,15 @@ def find_provider_distributions(extension: str, selected_providers: list[str]) - for candidate in sorted(candidates): console.print(f" {candidate.as_posix()}") console.print() - return [candidate.as_posix() for candidate in candidates] + result = [] + for candidate in candidates: + # https://github.com/apache/airflow/pull/49339 + path_str = candidate.as_posix() + if "apache_airflow_providers_common_sql" in path_str: + console.print(f"[bright_blue]Adding [polars] extra to common.sql provider: {path_str}") + path_str += "[polars]" + result.append(path_str) + return result def calculate_constraints_location(