diff --git a/.dockerignore b/.dockerignore index d2a5d3e98c9e7..3ddf7344a430a 100644 --- a/.dockerignore +++ b/.dockerignore @@ -48,11 +48,11 @@ !.dockerignore !RELEASE_NOTES.rst !LICENSE -!MANIFEST.in !NOTICE !.github !empty !Dockerfile +!hatch_build.py # This folder is for you if you want to add any packages to the docker context when you build your own # docker image. most of other files and any new folder you add will be excluded by default @@ -68,8 +68,6 @@ !.bash_completion.d # Setup/version configuration -!setup.cfg -!setup.py !pyproject.toml !manifests !generated diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a508c06f4fc44..d9c3c76e39f00 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -193,7 +193,7 @@ jobs: # Push early BuildX cache to GitHub Registry in Apache repository, This cache does not wait for all the # tests to complete - it is run very early in the build process for "main" merges in order to refresh - # cache using the current constraints. This will speed up cache refresh in cases when setup.py + # cache using the current constraints. This will speed up cache refresh in cases when pyproject.toml # changes or in case of Dockerfile changes. Failure in this step is not a problem (at most it will # delay cache refresh. It does not attempt to upgrade to newer dependencies. # We only push CI cache as PROD cache usually does not gain as much from fresh cache because @@ -629,9 +629,9 @@ jobs: id: cache-doc-inventories with: path: ./docs/_inventory_cache/ - key: docs-inventory-${{ hashFiles('setup.py','setup.cfg','pyproject.toml;') }} + key: docs-inventory-${{ hashFiles('pyproject.toml;') }} restore-keys: | - docs-inventory-${{ hashFiles('setup.py','setup.cfg','pyproject.toml;') }} + docs-inventory-${{ hashFiles('pyproject.toml;') }} docs-inventory- - name: "Build docs" run: > @@ -693,9 +693,9 @@ jobs: id: cache-doc-inventories with: path: ./docs/_inventory_cache/ - key: docs-inventory-${{ hashFiles('setup.py','setup.cfg','pyproject.toml;') }} + key: docs-inventory-${{ hashFiles('pyproject.toml;') }} restore-keys: | - docs-inventory-${{ hashFiles('setup.py','setup.cfg','pyproject.toml;') }} + docs-inventory-${{ hashFiles('pyproject.toml;') }} docs-inventory- - name: "Spellcheck docs" run: > @@ -881,8 +881,9 @@ jobs: path: old-airflow - name: "Prepare airflow package: wheel" run: | - pip install pip==23.3.2 wheel==0.36.2 gitpython==3.1.40 - python setup.py egg_info --tag-build ".dev0" bdist_wheel -d ../dist + pip install pip==23.3.2 hatch==1.9.1 + # TODO(potiuk) make sure dev0 version is used + hatch --tag-build ".dev0" -t wheel working-directory: ./old-airflow - name: > Install and verify all provider packages and airflow on @@ -2072,8 +2073,7 @@ jobs: path: ".build/.k8s-env" key: "\ k8s-env-${{steps.breeze.outputs.host-python-version}}-\ - ${{ hashFiles('scripts/ci/kubernetes/k8s_requirements.txt','setup.cfg',\ - 'setup.py','pyproject.toml','generated/provider_dependencies.json') }}" + ${{ hashFiles('scripts/ci/kubernetes/k8s_requirements.txt','pyproject.toml') }}" - name: Run complete K8S tests ${{needs.build-info.outputs.kubernetes-combos-list-as-string}} run: breeze k8s run-complete-tests --run-in-parallel --upgrade env: diff --git a/.gitignore b/.gitignore index 32e202a43ea15..9a60bfed4f4c8 100644 --- a/.gitignore +++ b/.gitignore @@ -190,8 +190,6 @@ dmypy.json log.txt* # Provider-related ignores -/provider_packages/CHANGELOG.txt -/provider_packages/MANIFEST.in /airflow/providers/__init__.py # Docker context files diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3cabc586182aa..244f4a6393aa6 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -325,13 +325,6 @@ repos: files: Dockerfile.*$ pass_filenames: true require_serial: true - - id: check-setup-order - name: Check order of dependencies in setup.cfg and setup.py - language: python - files: ^setup\.cfg$|^setup\.py$ - pass_filenames: false - entry: ./scripts/ci/pre_commit/pre_commit_check_order_setup.py - additional_dependencies: ['rich>=12.4.4'] - id: check-airflow-k8s-not-used name: Check airflow.kubernetes imports are not used language: python @@ -355,14 +348,6 @@ repos: exclude: ^airflow/kubernetes/|^airflow/providers/ entry: ./scripts/ci/pre_commit/pre_commit_check_cncf_k8s_used_for_k8s_executor_only.py additional_dependencies: ['rich>=12.4.4'] - - id: check-extra-packages-references - name: Checks setup extra packages - description: Checks if all the libraries in setup.py are listed in extra-packages-ref.rst file - language: python - files: ^setup\.py$|^docs/apache-airflow/extra-packages-ref\.rst$|^airflow/providers/.*/provider\.yaml$ - pass_filenames: false - entry: ./scripts/ci/pre_commit/pre_commit_check_setup_extra_packages_ref.py - additional_dependencies: ['rich>=12.4.4'] - id: check-airflow-provider-compatibility name: Check compatibility of Providers with Airflow entry: ./scripts/ci/pre_commit/pre_commit_check_provider_airflow_compatibility.py @@ -392,19 +377,34 @@ repos: files: ^airflow/providers/.*/hooks/.*\.py$ additional_dependencies: ['rich>=12.4.4', 'pyyaml', 'packaging'] - id: update-providers-dependencies - name: Update cross-dependencies for providers packages + name: Update dependencies for provider packages entry: ./scripts/ci/pre_commit/pre_commit_update_providers_dependencies.py language: python - files: ^airflow/providers/.*\.py$|^airflow/providers/.*/provider\.yaml$|^tests/providers/.*\.py$|^tests/system/providers/.*\.py$ + files: ^airflow/providers/.*\.py$|^airflow/providers/.*/provider\.yaml$|^tests/providers/.*\.py$|^tests/system/providers/.*\.py$^scripts/ci/pre_commit/pre_commit_update_providers_dependencies\.py$ pass_filenames: false - additional_dependencies: ['setuptools', 'rich>=12.4.4', 'pyyaml'] + additional_dependencies: ['setuptools', 'rich>=12.4.4', 'pyyaml', 'tomli'] + - id: check-extra-packages-references + name: Checks setup extra packages + description: Checks if all the extras defined in pyproject.toml are listed in extra-packages-ref.rst file + language: python + files: ^docs/apache-airflow/extra-packages-ref\.rst$|^pyproject.toml + pass_filenames: false + entry: ./scripts/ci/pre_commit/pre_commit_check_extra_packages_ref.py + additional_dependencies: ['rich>=12.4.4', 'tomli', 'tabulate'] + - id: check-pyproject-toml-order + name: Check order of dependencies in pyproject.toml + language: python + files: ^pyproject\.toml$ + pass_filenames: false + entry: ./scripts/ci/pre_commit/pre_commit_check_order_pyproject_toml.py + additional_dependencies: ['rich>=12.4.4'] - id: update-extras name: Update extras in documentation entry: ./scripts/ci/pre_commit/pre_commit_insert_extras.py language: python files: ^setup\.py$|^CONTRIBUTING\.rst$|^INSTALL$|^airflow/providers/.*/provider\.yaml$ pass_filenames: false - additional_dependencies: ['rich>=12.4.4'] + additional_dependencies: ['rich>=12.4.4', 'tomli'] - id: check-extras-order name: Check order of extras in Dockerfile entry: ./scripts/ci/pre_commit/pre_commit_check_order_dockerfile_extras.py diff --git a/BREEZE.rst b/BREEZE.rst index 7bcda450295ca..b3fd301d55a91 100644 --- a/BREEZE.rst +++ b/BREEZE.rst @@ -1570,7 +1570,7 @@ The CI image is built automatically as needed, however it can be rebuilt manuall Building the image first time pulls a pre-built version of images from the Docker Hub, which may take some time. But for subsequent source code changes, no wait time is expected. -However, changes to sensitive files like ``setup.py`` or ``Dockerfile.ci`` will trigger a rebuild +However, changes to sensitive files like ``pyproject.toml`` or ``Dockerfile.ci`` will trigger a rebuild that may take more time though it is highly optimized to only rebuild what is needed. Breeze has built in mechanism to check if your local image has not diverged too much from the @@ -2300,7 +2300,7 @@ These are all available flags of ``release-management add-back-references`` comm Generating constraints """""""""""""""""""""" -Whenever setup.py gets modified, the CI main job will re-generate constraint files. Those constraint +Whenever ``pyproject.toml`` gets modified, the CI main job will re-generate constraint files. Those constraint files are stored in separated orphan branches: ``constraints-main``, ``constraints-2-0``. Those are constraint files as described in detail in the @@ -2342,14 +2342,14 @@ These are all available flags of ``generate-constraints`` command: :width: 100% :alt: Breeze generate-constraints -In case someone modifies setup.py, the scheduled CI Tests automatically upgrades and +In case someone modifies ``pyproject.toml``, the scheduled CI Tests automatically upgrades and pushes changes to the constraint files, however you can also perform test run of this locally using the procedure described in the `Manually generating image cache and constraints `_ which utilises multiple processors on your local machine to generate such constraints faster. -This bumps the constraint files to latest versions and stores hash of setup.py. The generated constraint -and setup.py hash files are stored in the ``files`` folder and while generating the constraints diff +This bumps the constraint files to latest versions and stores hash of ``pyproject.toml``. The generated constraint +and ``pyproject.toml`` hash files are stored in the ``files`` folder and while generating the constraints diff of changes vs the previous constraint files is printed. Updating constraints @@ -2698,18 +2698,18 @@ disappear when you exit Breeze shell. When you want to add dependencies permanently, then it depends what kind of dependency you add. -If you want to add core dependency that should always be installed - you need to add it to ``setup.cfg`` -to ``install_requires`` section. If you want to add it to one of the optional core extras, you should -add it in the extra definition in ``setup.py`` (you need to find out where it is defined). If you want -to add it to one of the providers, you need to add it to the ``provider.yaml`` file in the provider +If you want to add core dependency that should always be installed - you need to add it to ``pyproject.toml`` +to ``dependencies`` section. If you want to add it to one of the optional core extras, you should +add it in the extra definition in ``pyproject.toml`` (you need to find out where it is defined). +If you want to add it to one of the providers, you need to add it to the ``provider.yaml`` file in the provider directory - but remember that this should be followed by running pre-commit that will automatically update -the ``generated/provider_dependencies.json`` directory with the new dependencies: +the ``pyproject.toml`` with the new dependencies as the ``provider.yaml`` files are not used directly, they +are used to update ``pyproject.toml`` file: .. code-block:: bash pre-commit run update-providers-dependencies --all-files - You can also run the pre-commit by ``breeze static-checks --type update-providers-dependencies --all-files`` command - which provides autocomplete. diff --git a/CI.rst b/CI.rst index fdedb0cd99703..2353ed66830a2 100644 --- a/CI.rst +++ b/CI.rst @@ -617,7 +617,7 @@ those via corresponding command line flags passed to ``breeze shell`` command. | ``UPGRADE_TO_NEWER_DEPENDENCIES`` | false | false | false\* | Determines whether the build should | | | | | | attempt to upgrade Python base image and all | | | | | | PIP dependencies to latest ones matching | -| | | | | ``setup.py`` limits. This tries to replicate | +| | | | | ``pyproject.toml`` limits. Tries to replicate | | | | | | the situation of "fresh" user who just installs | | | | | | airflow and uses latest version of matching | | | | | | dependencies. By default we are using a | @@ -638,7 +638,7 @@ those via corresponding command line flags passed to ``breeze shell`` command. | | | | | | | | | | | Setting the value to random value is best way | | | | | | to assure that constraints are upgraded even if | -| | | | | there is no change to setup.py | +| | | | | there is no change to ``pyproject.toml`` | | | | | | | | | | | | This way our constraints are automatically | | | | | | tested and updated whenever new versions | diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 94a7e40d58126..80c5cca35ab7b 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -841,29 +841,106 @@ Extras ------ There are a number of extras that can be specified when installing Airflow. Those -extras can be specified after the usual pip install - for example -``pip install -e .[ssh]``. For development purpose there is a ``devel`` extra that -installs all development dependencies. There is also ``devel_ci`` that installs -all dependencies needed in the CI environment. +extras can be specified after the usual pip install - for example ``pip install -e.[editable_ssh]`` for editable +installation. Note that there are two kinds of those extras - ``standard`` extras (used when you install +Airflow as a user and ``editable`` extras when you install airflow in editable installation as contributor. +There are also ``devel`` extras that allow to install development tools and libraries (such as ``pytest`` and +``mypy``) and ``doc`` extras - that allow to install tools needed to build the documentation. This is the full list of those extras: - .. START EXTRAS HERE -aiobotocore, airbyte, alibaba, all, all_dbs, amazon, apache.atlas, apache.beam, apache.cassandra, -apache.drill, apache.druid, apache.flink, apache.hdfs, apache.hive, apache.impala, apache.kafka, -apache.kylin, apache.livy, apache.pig, apache.pinot, apache.spark, apache.webhdfs, apprise, -arangodb, asana, async, atlas, atlassian.jira, aws, azure, cassandra, celery, cgroups, cloudant, -cncf.kubernetes, cohere, common.io, common.sql, crypto, databricks, datadog, dbt.cloud, -deprecated_api, devel, devel_all, devel_ci, devel_hadoop, dingding, discord, doc, doc_gen, docker, -druid, elasticsearch, exasol, fab, facebook, ftp, gcp, gcp_api, github, github_enterprise, google, -google_auth, grpc, hashicorp, hdfs, hive, http, imap, influxdb, jdbc, jenkins, kerberos, kubernetes, -ldap, leveldb, microsoft.azure, microsoft.mssql, microsoft.psrp, microsoft.winrm, mongo, mssql, -mysql, neo4j, odbc, openai, openfaas, openlineage, opensearch, opsgenie, oracle, otel, pagerduty, -pandas, papermill, password, pgvector, pinecone, pinot, postgres, presto, rabbitmq, redis, s3, s3fs, -salesforce, samba, saml, segment, sendgrid, sentry, sftp, singularity, slack, smtp, snowflake, -spark, sqlite, ssh, statsd, tableau, tabular, telegram, trino, vertica, virtualenv, weaviate, -webhdfs, winrm, yandex, zendesk - .. END EXTRAS HERE +Devel extras +............. + +The ``devel`` extras are not available in the released packages. They are only available when you install +Airflow from sources in ``editable`` installation - i.e. one that you are usually using to contribute to +Airflow. They provide tools such as ``pytest`` and ``mypy`` for general purpose development and testing, also +some providers have their own development-related extras tbat allow to install tools necessary to run tests, +where the tools are specific for the provider. + + .. START DEVEL EXTRAS HERE +devel, devel_all, devel_all_dbs, devel_amazon, devel_azure, devel_breeze, devel_ci, devel_debuggers, +devel_deltalake, devel_devscripts, devel_duckdb, devel_hadoop, devel_iceberg, devel_mongo, +devel_mypy, devel_sentry, devel_static_checks, devel_tests + .. END DEVEL EXTRAS HERE + +Doc extras +........... + +The ``doc`` extras are not available in the released packages. They are only available when you install +Airflow from sources in ``editable`` installation - i.e. one that you are usually using to contribute to +Airflow. They provide tools needed when you want to build Airflow documentation (note that you also need +``devel`` extras installed for airflow and providers in order to build documentation for airflow and +provider packages respectively). The ``doc`` package is enough to build regular documentation, where +``doc_gen`` is needed to generate ER diagram we have describing our database. + + .. START DEVEL EXTRAS HERE +devel, devel_all, devel_all_dbs, devel_amazon, devel_azure, devel_breeze, devel_ci, devel_debuggers, +devel_deltalake, devel_devscripts, devel_duckdb, devel_hadoop, devel_iceberg, devel_mongo, +devel_mypy, devel_sentry, devel_static_checks, devel_tests + .. END DEVEL EXTRAS HERE + + +Editable extras +............... + +Those are extras that you use to install dependencies that are needed for optional Airflow features - +including when you want to develop providers + +The editable extras are not available in the released packages. They are only available when you install +Airflow from sources in ``editable`` installation - i.e. one that you are usually using to contribute to +Airflow. You can install any of those extras in your editable install when you want to test optional +feature or provider of Airflow. When installing those extras in editable installation of Airflow, they +will only install provider dependencies, not the providers themselves - which allows to iterate and +develop Airflow Providers within the checked out Airflow source tree - provider sources from the ``main`` +branch are directly used for provider tests and you can modify them and see the changes immediately. + + .. START EDITABLE EXTRAS HERE +editable_airbyte, editable_alibaba, editable_amazon, editable_apache_beam, +editable_apache_cassandra, editable_apache_drill, editable_apache_druid, editable_apache_flink, +editable_apache_hdfs, editable_apache_hive, editable_apache_impala, editable_apache_kafka, +editable_apache_kylin, editable_apache_livy, editable_apache_pig, editable_apache_pinot, +editable_apache_spark, editable_apprise, editable_arangodb, editable_asana, editable_atlassian_jira, +editable_celery, editable_cloudant, editable_cncf_kubernetes, editable_cohere, editable_common_io, +editable_common_sql, editable_databricks, editable_datadog, editable_dbt_cloud, editable_dingding, +editable_discord, editable_docker, editable_elasticsearch, editable_exasol, editable_fab, +editable_facebook, editable_ftp, editable_github, editable_google, editable_grpc, +editable_hashicorp, editable_http, editable_imap, editable_influxdb, editable_jdbc, +editable_jenkins, editable_microsoft_azure, editable_microsoft_mssql, editable_microsoft_psrp, +editable_microsoft_winrm, editable_mongo, editable_mysql, editable_neo4j, editable_odbc, +editable_openai, editable_openfaas, editable_openlineage, editable_opensearch, editable_opsgenie, +editable_oracle, editable_pagerduty, editable_papermill, editable_pgvector, editable_pinecone, +editable_postgres, editable_presto, editable_redis, editable_salesforce, editable_samba, +editable_segment, editable_sendgrid, editable_sftp, editable_singularity, editable_slack, +editable_smtp, editable_snowflake, editable_sqlite, editable_ssh, editable_tableau, +editable_tabular, editable_telegram, editable_trino, editable_vertica, editable_weaviate, +editable_yandex, editable_zendesk + .. END EDITABLE EXTRAS HERE + + +Standard extras +............... + +Those extras are available in released Airflow packages and are targeted to be used by Airflow users +to select features of Airflow they want to use They might install additional providers or just install +dependencies that are necessary to enable the feature. + + .. START STANDARD EXTRAS HERE +aiobotocore, airbyte, alibaba, all, all_dbs, amazon, apache_atlas, apache_beam, apache_cassandra, +apache_drill, apache_druid, apache_flink, apache_hdfs, apache_hive, apache_impala, apache_kafka, +apache_kylin, apache_livy, apache_pig, apache_pinot, apache_spark, apache_webhdfs, apprise, +arangodb, asana, async, atlas, atlassian_jira, aws, azure, cassandra, celery, cgroups, cloudant, +cncf_kubernetes, cohere, common_io, common_sql, crypto, databricks, datadog, dbt_cloud, +deprecated_api, dingding, discord, docker, druid, elasticsearch, exasol, fab, facebook, ftp, gcp, +gcp_api, github, github_enterprise, google, google_auth, graphviz, grpc, hashicorp, hdfs, hive, +http, imap, influxdb, jdbc, jenkins, kerberos, kubernetes, ldap, leveldb, microsoft_azure, +microsoft_mssql, microsoft_psrp, microsoft_winrm, mongo, mssql, mysql, neo4j, odbc, openai, +openfaas, openlineage, opensearch, opsgenie, oracle, otel, pagerduty, pandas, papermill, password, +pgvector, pinecone, pinot, postgres, presto, rabbitmq, redis, s3, s3fs, salesforce, samba, saml, +segment, sendgrid, sentry, sftp, singularity, slack, smtp, snowflake, spark, sqlite, ssh, statsd, +tableau, tabular, telegram, trino, vertica, virtualenv, weaviate, webhdfs, winrm, yandex, zendesk + .. END STANDARD EXTRAS HERE + Provider packages ----------------- @@ -885,29 +962,24 @@ of ``airflow\providers``. This file contains: * list of integrations, operators, hooks, sensors, transfers provided by the provider (useful for documentation generation) * list of connection types, extra-links, secret backends, auth backends, and logging handlers (useful to both register them as they are needed by Airflow and to include them in documentation automatically). +* and more ... If you want to add dependencies to the provider, you should add them to the corresponding ``provider.yaml`` and Airflow pre-commits and package generation commands will use them when preparing package information. In Airflow 1.10 all those providers were installed together within one single package and when you installed airflow locally, from sources, they were also installed. In Airflow 2.0, providers are separated out, -and not packaged together with the core, unless you set ``INSTALL_PROVIDERS_FROM_SOURCES`` environment -variable to ``true``. - -In Breeze - which is a development environment, ``INSTALL_PROVIDERS_FROM_SOURCES`` variable is set to true, -but you can add ``--install-providers-from-sources=false`` flag to Breeze to install providers from PyPI instead of source files when -building the images. +and not packaged together with the core when you build "apache-airflow" package, however when you install +airflow project locally with ``pip install .`` or ``pip install -e .`` they are installed together. -One watch-out - providers are still always installed (or rather available) if you install airflow from -sources using ``-e`` (or ``--editable``) flag. In such case airflow is read directly from the sources -without copying airflow packages to the usual installation location, and since 'providers' folder is -in this airflow folder - the providers package is importable. +You should only update dependencies for the provider in the corresponding ``provider.yaml`` which is the +source of truth for all information about the provider. Some of the packages have cross-dependencies with other providers packages. This typically happens for transfer operators where operators use hooks from the other providers in case they are transferring -data between the providers. The list of dependencies is maintained (automatically with pre-commits) -in the ``generated/provider_dependencies.json``. Pre-commits are also used to generate dependencies. -The dependency list is automatically used during PyPI packages generation. +data between the providers. The list of dependencies is maintained (automatically with the +``update-providers-dependencies`` pre-commit) in the ``generated/provider_dependencies.json``. +Same pre-commit also updates generate dependencies in ``pyproject.toml``. Cross-dependencies between provider packages are converted into extras - if you need functionality from the other provider package you can install it adding [extra] after the @@ -916,8 +988,9 @@ the other provider package you can install it adding [extra] after the transfer operators from Amazon ECS. If you add a new dependency between different providers packages, it will be detected automatically during -and pre-commit will generate new entry in ``generated/provider_dependencies.json`` so that -the package extra dependencies are properly handled when package is installed. +and pre-commit will generate new entry in ``generated/provider_dependencies.json`` and update +``pyproject.toml`` so that the package extra dependencies are properly handled when package +might be installed when breeze is restarted or by your IDE or by running ``pip install -e .``. Developing community managed provider packages ---------------------------------------------- @@ -927,27 +1000,26 @@ They are part of the same repository as Apache Airflow (we use ``monorepo`` appr parts of the system are developed in the same repository but then they are packaged and released separately). All the community-managed providers are in 'airflow/providers' folder and they are all sub-packages of 'airflow.providers' package. All the providers are available as ``apache-airflow-providers-`` -packages. +packages when installed by users, but when you contribute to providers you can work on airflow main +and install provider dependencies via ``editable`` extras - without having to manage and install providers +separately, you can easily run tests for the providers and when you run airflow from the ``main`` +sources, all community providers are automatically available for you. The capabilities of the community-managed providers are the same as the third-party ones. When the providers are installed from PyPI, they provide the entry-point containing the metadata as described in the previous chapter. However when they are locally developed, together with Airflow, the mechanism of discovery of the providers is based on ``provider.yaml`` file that is placed in the top-folder of -the provider. Similarly as in case of the ``provider.yaml`` file is compliant with the -`json-schema specification `_. -Thanks to that mechanism, you can develop community managed providers in a seamless way directly from -Airflow sources, without preparing and releasing them as packages. This is achieved by: +the provider. The ``provider.yaml`` is the single source of truth for the provider metadata and it is +there where you should add and remove dependencies for providers (following by running +``update-providers-dependencies`` pre-commit to synchronize the dependencies with ``pyproject.toml`` +of Airflow). -* When Airflow is installed locally in editable mode (``pip install -e``) the provider packages installed - from PyPI are uninstalled and the provider discovery mechanism finds the providers in the Airflow - sources by searching for provider.yaml files. +The ``provider.yaml`` file is compliant with the schema that is available in +`json-schema specification `_. -* When you want to install Airflow from sources you can set ``INSTALL_PROVIDERS_FROM_SOURCES`` variable - to ``true`` and then the providers will not be installed from PyPI packages, but they will be installed - from local sources as part of the ``apache-airflow`` package, but additionally the ``provider.yaml`` files - are copied together with the sources, so that capabilities and names of the providers can be discovered. - This mode is especially useful when you are developing a new provider, that cannot be installed from - PyPI and you want to check if it installs cleanly. +Thanks to that mechanism, you can develop community managed providers in a seamless way directly from +Airflow sources, without preparing and releasing them as packages separately, which would be rather +complicated. Regardless if you plan to contribute your provider, when you are developing your own, custom providers, you can use the above functionality to make your development easier. You can add your provider @@ -955,6 +1027,7 @@ as a sub-folder of the ``airflow.providers`` package, add the ``provider.yaml`` in development mode - then capabilities of your provider will be discovered by airflow and you will see the provider among other providers in ``airflow providers`` command output. + Documentation for the community managed providers ------------------------------------------------- @@ -980,12 +1053,13 @@ You can see for example ``google`` provider which has very comprehensive documen * `Documentation `_ * `Example DAGs `_ -Part of the documentation are example dags. We are using the example dags for various purposes in -providers: +Part of the documentation are example dags (placed in the ``tests/system`` folder). The reason why +they are in ``tests/system`` is because we are using the example dags for various purposes: * showing real examples of how your provider classes (Operators/Sensors/Transfers) can be used * snippets of the examples are embedded in the documentation via ``exampleinclude::`` directive -* examples are executable as system tests +* examples are executable as system tests and some of our stakeholders run them regularly to + check if ``system`` level instagration is still working, before releasing a new version of the provider. Testing the community managed providers --------------------------------------- @@ -1022,8 +1096,7 @@ be open to allow several different libraries with the same requirements to be in The problem is that Apache Airflow is a bit of both - application to install and library to be used when you are developing your own operators and DAGs. -This - seemingly unsolvable - puzzle is solved by having pinned constraints files. Those are available -as of airflow 1.10.10 and further improved with 1.10.12 (moved to separate orphan branches) +This - seemingly unsolvable - puzzle is solved by having pinned constraints files. Pinned constraint files ======================= @@ -1112,7 +1185,7 @@ If you want to update just airflow dependencies, without paying attention to pro The ``constraints-.txt`` and ``constraints-no-providers-.txt`` -will be automatically regenerated by CI job every time after the ``setup.py`` is updated and pushed +will be automatically regenerated by CI job every time after the ``pyproject.toml`` is updated and pushed if the tests are successful. diff --git a/CONTRIBUTORS_QUICK_START.rst b/CONTRIBUTORS_QUICK_START.rst index f56a9e6752411..34d96494d9864 100644 --- a/CONTRIBUTORS_QUICK_START.rst +++ b/CONTRIBUTORS_QUICK_START.rst @@ -151,14 +151,13 @@ Pyenv and setting up virtual-env basic system-level dependencies on Debian/Ubuntu-like system. You will have to adapt it to install similar packages if your operating system is MacOS or another flavour of Linux - .. code-block:: bash sudo apt install openssl sqlite default-libmysqlclient-dev libmysqlclient-dev postgresql If you want to install all airflow providers, more system dependencies might be needed. For example on Debian/Ubuntu -like system, this command will install all necessary dependencies that should be installed when you use ``devel_all`` -extra while installing airflow. +like system, this command will install all necessary dependencies that should be installed when you use +``devel_all`` extra while installing airflow. .. code-block:: bash diff --git a/Dockerfile b/Dockerfile index c1ed35a61ebd8..134d53096499c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -786,7 +786,7 @@ function install_airflow() { "${AIRFLOW_INSTALLATION_METHOD}[${AIRFLOW_EXTRAS}]${AIRFLOW_VERSION_SPECIFICATION}" \ --constraint "${AIRFLOW_CONSTRAINTS_LOCATION}" common::install_pip_version - # then upgrade if needed without using constraints to account for new limits in setup.py + # then upgrade if needed without using constraints to account for new limits in pyproject.toml pip install --root-user-action ignore --upgrade --upgrade-strategy only-if-needed \ ${ADDITIONAL_PIP_INSTALL_FLAGS} \ ${AIRFLOW_INSTALL_EDITABLE_FLAG} \ @@ -1291,17 +1291,13 @@ ARG DEFAULT_CONSTRAINTS_BRANCH="constraints-main" # By default PIP has progress bar but you can disable it. ARG PIP_PROGRESS_BAR # By default we do not use pre-cached packages, but in CI/Breeze environment we override this to speed up -# builds in case setup.py/setup.cfg changed. This is pure optimisation of CI/Breeze builds. +# builds in case pyproject.toml changed. This is pure optimisation of CI/Breeze builds. ARG AIRFLOW_PRE_CACHED_PIP_PACKAGES="false" # This is airflow version that is put in the label of the image build ARG AIRFLOW_VERSION # By default latest released version of airflow is installed (when empty) but this value can be overridden # and we can install version according to specification (For example ==2.0.2 or <3.0.0). ARG AIRFLOW_VERSION_SPECIFICATION -# By default we install providers from PyPI but in case of Breeze build we want to install providers -# from local sources without the need of preparing provider packages upfront. This value is -# automatically overridden by Breeze scripts. -ARG INSTALL_PROVIDERS_FROM_SOURCES="false" # Determines the way airflow is installed. By default we install airflow from PyPI `apache-airflow` package # But it also can be `.` from local installation or GitHub URL pointing to specific branch or tag # Of Airflow. Note That for local source installation you need to have local sources of @@ -1330,7 +1326,6 @@ ARG ADDITIONAL_PIP_INSTALL_FLAGS="" ENV AIRFLOW_PIP_VERSION=${AIRFLOW_PIP_VERSION} \ AIRFLOW_PRE_CACHED_PIP_PACKAGES=${AIRFLOW_PRE_CACHED_PIP_PACKAGES} \ - INSTALL_PROVIDERS_FROM_SOURCES=${INSTALL_PROVIDERS_FROM_SOURCES} \ AIRFLOW_VERSION=${AIRFLOW_VERSION} \ AIRFLOW_INSTALLATION_METHOD=${AIRFLOW_INSTALLATION_METHOD} \ AIRFLOW_VERSION_SPECIFICATION=${AIRFLOW_VERSION_SPECIFICATION} \ @@ -1375,8 +1370,7 @@ ARG USE_CONSTRAINTS_FOR_CONTEXT_PACKAGES="false" # In case of Production build image segment we want to pre-install main version of airflow # dependencies from GitHub so that we do not have to always reinstall it from the scratch. -# The Airflow (and providers in case INSTALL_PROVIDERS_FROM_SOURCES is "false") -# are uninstalled, only dependencies remain +# The Airflow and providers are uninstalled, only dependencies remain # the cache is only used when "upgrade to newer dependencies" is not set to automatically # account for removed dependencies (we do not install them in the first place) and in case # INSTALL_PACKAGES_FROM_CONTEXT is not set (because then caching it from main makes no sense). diff --git a/Dockerfile.ci b/Dockerfile.ci index e6f9964d84f81..4a086d6c0541d 100644 --- a/Dockerfile.ci +++ b/Dockerfile.ci @@ -624,7 +624,7 @@ function install_airflow() { "${AIRFLOW_INSTALLATION_METHOD}[${AIRFLOW_EXTRAS}]${AIRFLOW_VERSION_SPECIFICATION}" \ --constraint "${AIRFLOW_CONSTRAINTS_LOCATION}" common::install_pip_version - # then upgrade if needed without using constraints to account for new limits in setup.py + # then upgrade if needed without using constraints to account for new limits in pyproject.toml pip install --root-user-action ignore --upgrade --upgrade-strategy only-if-needed \ ${ADDITIONAL_PIP_INSTALL_FLAGS} \ ${AIRFLOW_INSTALL_EDITABLE_FLAG} \ @@ -900,7 +900,7 @@ function check_download_sqlalchemy() { if [[ ${DOWNGRADE_SQLALCHEMY=} != "true" ]]; then return fi - min_sqlalchemy_version=$(grep "sqlalchemy>=" setup.cfg | sed "s/.*>=\([0-9\.]*\).*/\1/") + min_sqlalchemy_version=$(grep "sqlalchemy>=" pyproject.toml | sed "s/.*>=\([0-9\.]*\).*/\1/") echo echo "${COLOR_BLUE}Downgrading sqlalchemy to minimum supported version: ${min_sqlalchemy_version}${COLOR_RESET}" echo @@ -1058,8 +1058,6 @@ ARG DEFAULT_CONSTRAINTS_BRANCH="constraints-main" # It can also be overwritten manually by setting the AIRFLOW_CI_BUILD_EPOCH environment variable. ARG AIRFLOW_CI_BUILD_EPOCH="6" ARG AIRFLOW_PRE_CACHED_PIP_PACKAGES="true" -# By default in the image, we are installing all providers when installing from sources -ARG INSTALL_PROVIDERS_FROM_SOURCES="true" ARG AIRFLOW_PIP_VERSION=23.3.2 # Setup PIP # By default PIP install run without cache to make image smaller @@ -1089,7 +1087,6 @@ ENV AIRFLOW_REPO=${AIRFLOW_REPO}\ DEFAULT_CONSTRAINTS_BRANCH=${DEFAULT_CONSTRAINTS_BRANCH} \ AIRFLOW_CI_BUILD_EPOCH=${AIRFLOW_CI_BUILD_EPOCH} \ AIRFLOW_PRE_CACHED_PIP_PACKAGES=${AIRFLOW_PRE_CACHED_PIP_PACKAGES} \ - INSTALL_PROVIDERS_FROM_SOURCES=${INSTALL_PROVIDERS_FROM_SOURCES} \ AIRFLOW_VERSION=${AIRFLOW_VERSION} \ AIRFLOW_PIP_VERSION=${AIRFLOW_PIP_VERSION} \ # In the CI image we always: @@ -1142,8 +1139,7 @@ COPY --from=scripts install_pip_version.sh install_airflow_dependencies_from_bra # In case of CI builds we want to pre-install main version of airflow dependencies so that # We do not have to always reinstall it from the scratch. # And is automatically reinstalled from the scratch every time patch release of python gets released -# The Airflow (and providers in case INSTALL_PROVIDERS_FROM_SOURCES is "false") -# are uninstalled, only dependencies remain. +# The Airflow and providers are uninstalled, only dependencies remain. # the cache is only used when "upgrade to newer dependencies" is not set to automatically # account for removed dependencies (we do not install them in the first place) RUN bash /scripts/docker/install_pip_version.sh; \ @@ -1162,21 +1158,21 @@ COPY --from=scripts install_pipx_tools.sh /scripts/docker/ RUN bash /scripts/docker/install_pipx_tools.sh # Airflow sources change frequently but dependency configuration won't change that often -# We copy setup.py and other files needed to perform setup of dependencies -# So in case setup.py changes we can install latest dependencies required. -COPY setup.py ${AIRFLOW_SOURCES}/setup.py -COPY setup.cfg ${AIRFLOW_SOURCES}/setup.cfg +# We copy pyproject.toml and other files needed to perform setup of dependencies +# So in case pyproject.toml changes we can install latest dependencies required. +COPY pyproject.toml ${AIRFLOW_SOURCES}/pyproject.toml COPY airflow/__init__.py ${AIRFLOW_SOURCES}/airflow/ -COPY generated/provider_dependencies.json ${AIRFLOW_SOURCES}/generated/ +COPY generated/* ${AIRFLOW_SOURCES}/generated/ COPY constraints/* ${AIRFLOW_SOURCES}/constraints/ - +COPY LICENSE ${AIRFLOW_SOURCES}/LICENSE +COPY hatch_build.py ${AIRFLOW_SOURCES} COPY --from=scripts install_airflow.sh /scripts/docker/ -# The goal of this line is to install the dependencies from the most current setup.py from sources +# The goal of this line is to install the dependencies from the most current pyproject.toml from sources # This will be usually incremental small set of packages in CI optimized build, so it will be very fast # In non-CI optimized build this will install all dependencies before installing sources. -# Usually we will install versions based on the dependencies in setup.py and upgraded only if needed. -# But in cron job we will install latest versions matching setup.py to see if there is no breaking change +# Usually we will install versions based on the dependencies in pyproject.toml and upgraded only if needed. +# But in cron job we will install latest versions matching pyproject.toml to see if there is no breaking change # and push the constraints if everything is successful RUN bash /scripts/docker/install_airflow.sh diff --git a/IMAGES.rst b/IMAGES.rst index 9cb6d94bb3750..809d33a3855e3 100644 --- a/IMAGES.rst +++ b/IMAGES.rst @@ -54,7 +54,7 @@ CI image The CI image is used by `Breeze `_ as the shell image but it is also used during CI tests. The image is single segment image that contains Airflow installation with "all" dependencies installed. It is optimised for rebuild speed. It installs PIP dependencies from the current branch first - -so that any changes in ``setup.py`` do not trigger reinstalling of all dependencies. +so that any changes in ``pyproject.toml`` do not trigger reinstalling of all dependencies. There is a second step of installation that re-installs the dependencies from the latest sources so that we are sure that latest dependencies are installed. diff --git a/INSTALL b/INSTALL index 4b6da57ae840f..6686aa21e78df 100644 --- a/INSTALL +++ b/INSTALL @@ -1,118 +1,324 @@ # INSTALL / BUILD instructions for Apache Airflow -This is a generic installation method that requires a number of dependencies to be installed. +## Basic installation of Airflow from sources and development environment setup + +This is a generic installation method that requires minimum starndard tools to develop airflow and +test it in local virtual environment (using standard CPyhon installation and `pip`). Depending on your system you might need different prerequisites, but the following systems/prerequisites are known to work: -Linux (Debian Bullseye, Bookworm and Linux Mint Debbie): +Linux (Debian Bookworm): -sudo apt install build-essential python3-dev libsqlite3-dev openssl \ - sqlite default-libmysqlclient-dev libmysqlclient-dev postgresql + sudo apt install -y --no-install-recommends apt-transport-https apt-utils ca-certificates \ + curl dumb-init freetds-bin gosu krb5-user libgeos-dev \ + ldap-utils libsasl2-2 libsasl2-modules libxmlsec1 locales libffi8 libldap-2.5-0 libssl3 netcat-openbsd \ + lsb-release openssh-client python3-selinux rsync sasl2-bin sqlite3 sudo unixodbc -On Ubuntu 20.04 you may get an error of mariadb_config not found -and mysql_config not found. +You might need to install MariaDB development headers to build some of the dependencies -Install MariaDB development headers: -sudo apt-get install libmariadb-dev libmariadbclient-dev + sudo apt-get install libmariadb-dev libmariadbclient-dev -MacOS (Mojave/Catalina): +MacOS (Mojave/Catalina) you might need to to install XCode command line tools and brew and those packages: -brew install sqlite mysql postgresql + brew install sqlite mysql postgresql -# [required] fetch the tarball and untar the source move into the directory that was untarred. +## Downloading and installing Airflow from sources -# [optional] run Apache RAT (release audit tool) to validate license headers -# RAT docs here: https://creadur.apache.org/rat/. Requires Java and Apache Rat -java -jar apache-rat.jar -E ./.rat-excludes -d . +While you can get Airflow sources in various ways (including cloning https://github.com/apache/airflow/), the +canonical way to download it is to fetch the tarball published at https://downloads.apache.org where you can +also verify checksum, signatures of the downloaded file. You can then and un-tar the source move into the +directory that was un-tarred. -# [optional] Airflow pulls in quite a lot of dependencies in order -# to connect to other services. You might want to test or run Airflow -# from a virtual env to make sure those dependencies are separated -# from your system wide versions +When you download source packages from https://downloads.apache.org, you download sources of Airflow and +all providers separately, however when you clone the GitHub repository at https://github.com/apache/airflow/ +you get all sources in one place. This is the most convenient way to develop Airflow and Providers together. +otherwise you have to separately install Airflow and Providers from sources in the same environment, which +is not as convenient. -python3 -m venv PATH_TO_YOUR_VENV -source PATH_TO_YOUR_VENV/bin/activate +## Creating virtualenv -# [required] building and installing by pip (preferred) -pip install . +Airflow pulls in quite a lot of dependencies in order to connect to other services. You generally want to +test or run Airflow from a virtual env to make sure those dependencies are separated from your system +wide versions. Using system-installed Python installation is strongly discouraged as the versions of Python +shipped with operating system often have a number of limitations and are not up to date. It is recommended +to install Python using either https://www.python.org/downloads/ or other tools that use them. See later +for description of `Hatch` as one of the tools that is Airflow's tool of choice to build Airflow packages. -# or directly -python setup.py install +Once you have a suitable Python version installed, you can create a virtualenv and activate it: -# You can also install recommended version of the dependencies by using -# constraint-python.txt files as constraint file. This is needed in case -# you have problems with installing the current requirements from PyPI. -# There are different constraint files for different python versions. For example" + python3 -m venv PATH_TO_YOUR_VENV + source PATH_TO_YOUR_VENV/bin/activate -pip install . \ - --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-3.8.txt" +## Installing airflow locally +Installing airflow locally can be done using pip - note that this will install "development" version of +Airflow, where all providers are installed from local sources (if they are available), not from `pypi`. +It will also not include pre-installed providers installed from PyPI. In case you install from sources of +just Airflow, you need to install separately each provider that you want to develop. In case you install +from GitHub repository, all the current providers are available after installing Airflow. -By default `pip install` in Airflow 2.0 installs only the provider packages that are needed by the extras and -install them as packages from PyPI rather than from local sources: + pip install . -pip install .[google,amazon] \ - --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-3.8.txt" +If you develop Airflow and iterate on it you should install it in editable mode (with -e) flag and then +you do not need to re-install it after each change to sources. This is useful if you want to develop and +iterate on Airflow and Providers (together) if you install sources from cloned GitHub repository. + pip install -e . -You can upgrade just airflow, without paying attention to provider's dependencies by using 'constraints-no-providers' -constraint files. This allows you to keep installed provider packages. -pip install . --upgrade \ - --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-no-providers-3.8.txt" +You can also install optional packages that are needed to run certain tests. In case of local installation +for example you can install all prerequisites for google provider, tests and +all hadoop providers with this command: + + pip install -e ".[editable_google,devel_tests,devel_hadoop]" + + +or you can install all packages needed to run tests for core airflow: + + pip install -e ".[devel]" + +or you can install all packages needed to run tests for core, providers and all extensions of airflow: + + pip install -e ".[devel_all]" + +You can see the list of all available extras below. + +# Using Hatch to manage your Python, virtualenvs and build packages + +Airflow uses [hatch](https://hatch.pypa.io/) as a build and development tool of choice. It is one of popular +build tools and environment managers for Python, maintained by the Python Packaging Authority. +It is an optional tool that is only really needed when you want to build packages from sources, but +it is also very convenient to manage your Python versions and virtualenvs. + +Airflow project contains some pre-defined virtualenv definitions in ``pyproject.toml`` that can be +easily used by hatch to create your local venvs. This is not necessary for you to develop and test +Airflow, but it is a convenient way to manage your local Python versions and virtualenvs. + +## Installing Hatch + +You can install hat using various other ways (including Gui installers). + +Example using `pipx`: + + pipx install hatch + +We recommend using `pipx` as you can manage installed Python apps easily and later use it +to upgrade `hatch` easily as needed with: + + pipx upgrade hatch + +## Using Hatch to manage your Python versions + +You can also use hatch to install and manage airflow virtualenvs and development +environments. For example, you can install Python 3.10 with this command: + + hatch python install 3.10 + +or install all Python versions that are used in Airflow: + + hatch python install all + +## Using Hatch to manage your virtualenvs + +Airflow has some pre-defined virtualenvs that you can use to develop and test airflow. +You can see the list of available envs with: + + hatch show env + +This is what it shows currently: + +┏━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ +┃ Name ┃ Type ┃ Dependencies ┃ Description ┃ +┡━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ +│ default │ virtual │ apache-airflow[devel] │ Default environment with Python 3.8 for maximum compatibility │ +├─────────────┼─────────┼───────────────────────┼───────────────────────────────────────────────────────────────┤ +│ airflow-38 │ virtual │ apache-airflow[devel] │ Environment with Python 3.8 │ +├─────────────┼─────────┼───────────────────────┼───────────────────────────────────────────────────────────────┤ +│ airflow-39 │ virtual │ apache-airflow[devel] │ Environment with Python 3.9 │ +├─────────────┼─────────┼───────────────────────┼───────────────────────────────────────────────────────────────┤ +│ airflow-310 │ virtual │ apache-airflow[devel] │ Environment with Python 3.10 │ +├─────────────┼─────────┼───────────────────────┼───────────────────────────────────────────────────────────────┤ +│ airflow-311 │ virtual │ apache-airflow[devel] │ Environment with Python 3.11 │ +└─────────────┴─────────┴───────────────────────┴───────────────────────────────────────────────────────────────┘ + +The default env (if you have not used one explicitly) is `default` and it is a Python 3.8 +virtualenv for maximum compatibility. The default extra with which the environment is create is "devel", which +should be enough to develop and test basic airflow core tests. You can create the default environment with: + + hatch env create + +You can create specific environment by using them in create command: + + hatch env create airflow-310 + +You can install extras in the environment by running pip command: + + hatch -e airflow-310 run -- pip install ".[editable_google]" + +And you can enter the environment with running a shell of your choice (for example zsh) where you +can run any commands + + hatch -e airflow-310 shell + +You can also see where hatch created the virtualenvs and use it in your IDE or activate it manually: + + hatch env find airflow-310 + +You will get path similar to: + + /Users/jarek/Library/Application Support/hatch/env/virtual/apache-airflow/TReRdyYt/apache-airflow + +Then you will find `python` binary and `activate` script in the `bin` sub-folder of this directory and +you can configure your IDE to use this python virtualenv if you want to use that environment in your IDE. + +You can also set default environment by HATCH_ENV environment variable. +You can clean the env by running: -You can also install airflow in "editable mode" (with -e) flag and then provider packages are -available directly from the sources (and the provider packages installed from PyPI are UNINSTALLED in -order to avoid having providers in two places. And `provider.yaml` files are used to discover capabilities -of the providers which are part of the airflow source code. + hatch env prune -You can read more about `provider.yaml` and community-managed providers in -https://airflow.apache.org/docs/apache-airflow-providers/index.html for developing custom providers -and in ``CONTRIBUTING.rst`` for developing community maintained providers. +More information about hatch can be found in https://hatch.pypa.io/1.9/environment/ -This is useful if you want to develop providers: +## Using Hatch to build your packages + +You can use hatch to build installable package from the airflow sources. Such package will +include all metadata that is configured in `pyproject.toml` and will be installable with pip. + +The packages will have pre-installed dependencies for providers that are always +installed when Airflow is installed from PyPI. By default both `wheel` and `sdist` packages are built. + + hatch build + +You can also build only `wheel` or `sdist` packages: + + hatch build -t wheel + hatch build -t sdist + +## Installing recommended version of dependencies + +Whatever virtualenv solution you use, when you want to make sure you are using the same +version of dependencies as in main, you can install recommended version of the dependencies by using +constraint-python.txt files as `constraint` file. This might be useful +to avoid "works-for-me" syndrome, where you use different version of dependencies than the ones +that are used in main, CI tests and by other contributors. + +There are different constraint files for different python versions. For example this command will install +all basic devel requirements and requirements of google provider as last successfully tested for Python 3.8: + + pip install -e ".[devel,editable_google]"" \ + --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-3.8.txt" + +You can upgrade just airflow, without paying attention to provider's dependencies by using +the 'constraints-no-providers' constraint files. This allows you to keep installed provider dependencies +and install to latest supported ones by pure airflow core. pip install -e . \ - --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-3.8.txt" - -You can also skip installing provider packages from PyPI by setting INSTALL_PROVIDERS_FROM_SOURCE to "true". -In this case Airflow will be installed in non-editable mode with all providers installed from the sources. -Additionally `provider.yaml` files will also be copied to providers folders which will make the providers -discoverable by Airflow even if they are not installed from packages in this case. - -INSTALL_PROVIDERS_FROM_SOURCES="true" pip install . \ - --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-3.8.txt" - -Airflow can be installed with extras to install some additional features (for example 'async' or 'doc' or -to install automatically providers and all dependencies needed by that provider: - -pip install .[async,google,amazon] \ - --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-3.8.txt" - -The list of available extras: - -# START EXTRAS HERE -aiobotocore, airbyte, alibaba, all, all_dbs, amazon, apache.atlas, apache.beam, apache.cassandra, -apache.drill, apache.druid, apache.flink, apache.hdfs, apache.hive, apache.impala, apache.kafka, -apache.kylin, apache.livy, apache.pig, apache.pinot, apache.spark, apache.webhdfs, apprise, -arangodb, asana, async, atlas, atlassian.jira, aws, azure, cassandra, celery, cgroups, cloudant, -cncf.kubernetes, cohere, common.io, common.sql, crypto, databricks, datadog, dbt.cloud, -deprecated_api, devel, devel_all, devel_ci, devel_hadoop, dingding, discord, doc, doc_gen, docker, -druid, elasticsearch, exasol, fab, facebook, ftp, gcp, gcp_api, github, github_enterprise, google, -google_auth, grpc, hashicorp, hdfs, hive, http, imap, influxdb, jdbc, jenkins, kerberos, kubernetes, -ldap, leveldb, microsoft.azure, microsoft.mssql, microsoft.psrp, microsoft.winrm, mongo, mssql, -mysql, neo4j, odbc, openai, openfaas, openlineage, opensearch, opsgenie, oracle, otel, pagerduty, -pandas, papermill, password, pgvector, pinecone, pinot, postgres, presto, rabbitmq, redis, s3, s3fs, -salesforce, samba, saml, segment, sendgrid, sentry, sftp, singularity, slack, smtp, snowflake, -spark, sqlite, ssh, statsd, tableau, tabular, telegram, trino, vertica, virtualenv, weaviate, -webhdfs, winrm, yandex, zendesk -# END EXTRAS HERE - -# For installing Airflow in development environments - see CONTRIBUTING.rst - -# COMPILING FRONT-END ASSETS (in case you see "Please make sure to build the frontend in static/ directory and then restart the server") -# Optional : Installing yarn - https://classic.yarnpkg.com/en/docs/install - -python setup.py compile_assets + --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-no-providers-3.8.txt" + +## All airflow extras + +Airflow has a number of extras that you can install to get additional dependencies. They sometimes install +providers, sometimes enable other features where packages are not installed by default. + +You can read more about those extras in the extras reference: +https://airflow.apache.org/docs/apache-airflow/stable/extra-packages-ref.html + +The list of available extras is below. + +Devel extras - used to install development-related tools + +# START DEVEL EXTRAS HERE +devel, devel_all, devel_all_dbs, devel_amazon, devel_azure, devel_breeze, devel_ci, devel_debuggers, +devel_deltalake, devel_devscripts, devel_duckdb, devel_hadoop, devel_iceberg, devel_mongo, +devel_mypy, devel_sentry, devel_static_checks, devel_tests +# END DEVEL EXTRAS HERE + +Doc extras - used to install dependencies that are needed to build documentation. + +# START DOC EXTRAS HERE +doc, doc_gen +# END DOC EXTRAS HERE + + +Editable extras - used to install editable variants of production extras - which install +dependencies that do not install providers. + +# START EDITABLE EXTRAS HERE +editable_airbyte, editable_alibaba, editable_amazon, editable_apache_beam, +editable_apache_cassandra, editable_apache_drill, editable_apache_druid, editable_apache_flink, +editable_apache_hdfs, editable_apache_hive, editable_apache_impala, editable_apache_kafka, +editable_apache_kylin, editable_apache_livy, editable_apache_pig, editable_apache_pinot, +editable_apache_spark, editable_apprise, editable_arangodb, editable_asana, editable_atlassian_jira, +editable_celery, editable_cloudant, editable_cncf_kubernetes, editable_cohere, editable_common_io, +editable_common_sql, editable_databricks, editable_datadog, editable_dbt_cloud, editable_dingding, +editable_discord, editable_docker, editable_elasticsearch, editable_exasol, editable_fab, +editable_facebook, editable_ftp, editable_github, editable_google, editable_grpc, +editable_hashicorp, editable_http, editable_imap, editable_influxdb, editable_jdbc, +editable_jenkins, editable_microsoft_azure, editable_microsoft_mssql, editable_microsoft_psrp, +editable_microsoft_winrm, editable_mongo, editable_mysql, editable_neo4j, editable_odbc, +editable_openai, editable_openfaas, editable_openlineage, editable_opensearch, editable_opsgenie, +editable_oracle, editable_pagerduty, editable_papermill, editable_pgvector, editable_pinecone, +editable_postgres, editable_presto, editable_redis, editable_salesforce, editable_samba, +editable_segment, editable_sendgrid, editable_sftp, editable_singularity, editable_slack, +editable_smtp, editable_snowflake, editable_sqlite, editable_ssh, editable_tableau, +editable_tabular, editable_telegram, editable_trino, editable_vertica, editable_weaviate, +editable_yandex, editable_zendesk +# END EDITABLE EXTRAS HERE + +Standard extras - those are regular extras that are available for users in the standard packages +of Airflow. + +# START STANDARD EXTRAS HERE +aiobotocore, airbyte, alibaba, all, all_dbs, amazon, apache_atlas, apache_beam, apache_cassandra, +apache_drill, apache_druid, apache_flink, apache_hdfs, apache_hive, apache_impala, apache_kafka, +apache_kylin, apache_livy, apache_pig, apache_pinot, apache_spark, apache_webhdfs, apprise, +arangodb, asana, async, atlas, atlassian_jira, aws, azure, cassandra, celery, cgroups, cloudant, +cncf_kubernetes, cohere, common_io, common_sql, crypto, databricks, datadog, dbt_cloud, +deprecated_api, dingding, discord, docker, druid, elasticsearch, exasol, fab, facebook, ftp, gcp, +gcp_api, github, github_enterprise, google, google_auth, graphviz, grpc, hashicorp, hdfs, hive, +http, imap, influxdb, jdbc, jenkins, kerberos, kubernetes, ldap, leveldb, microsoft_azure, +microsoft_mssql, microsoft_psrp, microsoft_winrm, mongo, mssql, mysql, neo4j, odbc, openai, +openfaas, openlineage, opensearch, opsgenie, oracle, otel, pagerduty, pandas, papermill, password, +pgvector, pinecone, pinot, postgres, presto, rabbitmq, redis, s3, s3fs, salesforce, samba, saml, +segment, sendgrid, sentry, sftp, singularity, slack, smtp, snowflake, spark, sqlite, ssh, statsd, +tableau, tabular, telegram, trino, vertica, virtualenv, weaviate, webhdfs, winrm, yandex, zendesk +# END STANDARD EXTRAS HERE + + + +## Compiling front end assets + +Sometimes you can see that front-end assets are missing and website looks broken. This is because +you need to compile front-end assets. This is done automatically when you create a virtualenv +with hatch, but if you want to do it manually, you can do it after installing node and yarn and running: + + yarn install --frozen-lockfile + yarn run build + +Currently we are running yarn coming with note 18.6.0, but you should check the version in +our `.pre-commit-config.yaml` file (node version). + +Installing yarn is described in https://classic.yarnpkg.com/en/docs/install + +Also - in case you use `breeze` or have `pre-commit` installed you can build the assets with: + + pre-commit run --hook-stage manual compile-www-assets --all-files + +or + + breeze compile-www-assets + +Both commands will install node and yarn if needed to a dedicated pre-commit node environment and +then build the assets. + +Finally you can also clean and recompile assets with ``custom`` build target when running hatch build + + hatch build -t custom -t wheel -t sdist + +This will also update `git_version` file in airflow package that should contain the git commit hash of the +build. This is used to display the commit hash in the UI. + + +# TODO(potiuk) update MIN_BOTO3_VERSION everterei +_MIN_BOTO3_VERSION = "1.28.0" diff --git a/LOCAL_VIRTUALENV.rst b/LOCAL_VIRTUALENV.rst index 4dc9c11b5ed86..e5f4ecd0463d6 100644 --- a/LOCAL_VIRTUALENV.rst +++ b/LOCAL_VIRTUALENV.rst @@ -108,7 +108,9 @@ You are STRONGLY encouraged to also install and use `pre-commit hooks `_. +The full list of extras is available in ``_ and can be easily retrieved using hatch via + + Creating a Local virtualenv =========================== @@ -169,38 +171,31 @@ for different python versions). For development on current main source: .. code-block:: bash # use the same version of python as you are working with, 3.8, 3.9, 3.10 or 3.11 - pip install -e ".[devel,]" \ + pip install -e ".[devel,devel_tests,editable_amazon,...]" \ --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-source-providers-3.8.txt" This will install Airflow in 'editable' mode - where sources of Airflow are taken directly from the source -code rather than moved to the installation directory. During the installation airflow will install - but then -automatically remove all provider packages installed from PyPI - instead it will automatically use the -provider packages available in your local sources. +code rather than moved to the installation directory. + +TODO(potiuk): Clarify preinstalled You can also install Airflow in non-editable mode: .. code-block:: bash # use the same version of python as you are working with, 3.8, 3.9, 3.10 or 3.11 - pip install ".[devel,]" \ + pip install ".[devel,devel_tests,editable_amazon,...]" \ --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-source-providers-3.8.txt" This will copy the sources to directory where usually python packages are installed. You can see the list -of directories via ``python -m site`` command. In this case the providers are installed from PyPI, not from -sources, unless you set ``INSTALL_PROVIDERS_FROM_SOURCES`` environment variable to ``true`` - - .. code-block:: bash - - # use the same version of python as you are working with, 3.8, 3.9, 3.10 or 3.11 - INSTALL_PROVIDERS_FROM_SOURCES="true" pip install ".[devel,]" \ - --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-source-providers-3.8.txt" - +of directories via ``python -m site`` command. Note: when you first initialize database (the next step), you may encounter some problems. -This is because airflow by default will try to load in example dags where some of them requires dependencies ``google`` and ``postgres``. -You can solve the problem by: -- installing the extras i.e. ``[devel,google,postgres]`` or +This is because airflow by default will try to load in example dags where some of them requires +dependencies ``google`` and ``postgres``. You can solve the problem by: + +- installing the extras i.e. ``[devel,devel_google,devel_postgres]`` or - disable the example dags with environment variable: ``export AIRFLOW__CORE__LOAD_EXAMPLES=False`` or - simply ignore the error messages and proceed @@ -240,19 +235,7 @@ In Airflow 2.0 we introduced split of Apache Airflow into separate packages - th apache-airflow package with core of Airflow and 70+ packages for all providers (external services and software Airflow can communicate with). -Developing providers is part of Airflow development, but when you install airflow as editable in your local -development environment, the corresponding provider packages will be also installed from PyPI. However, the -providers will also be present in your "airflow/providers" folder. This might lead to confusion, -which sources of providers are imported during development. It will depend on your -environment's PYTHONPATH setting in general. - -In order to avoid the confusion, you can set ``INSTALL_PROVIDERS_FROM_SOURCES`` environment to ``true`` -before running ``pip install`` command: - -.. code-block:: bash - - INSTALL_PROVIDERS_FROM_SOURCES="true" pip install -U -e ".[devel,]" \ - --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-source-providers-3.8.txt" +TODO(potiuk): Clarify This way no providers packages will be installed and they will always be imported from the "airflow/providers" folder. diff --git a/README.md b/README.md index 984dd755e54ef..3a7cd5afc021a 100644 --- a/README.md +++ b/README.md @@ -145,7 +145,7 @@ Documentation for dependent projects like provider packages, Docker image, Helm We publish Apache Airflow as `apache-airflow` package in PyPI. Installing it however might be sometimes tricky because Airflow is a bit of both a library and application. Libraries usually keep their dependencies open, and applications usually pin them, but we should do neither and both simultaneously. We decided to keep -our dependencies as open as possible (in `setup.py`) so users can install different versions of libraries +our dependencies as open as possible (in `pyproject.toml`) so users can install different versions of libraries if needed. This means that `pip install apache-airflow` will not work from time to time or will produce unusable Airflow installation. @@ -377,7 +377,7 @@ binding. ### Approach for dependencies for Airflow Core -Those `extras` and `providers` dependencies are maintained in `setup.cfg`. +Those dependencies are maintained in ``pyproject.toml``. There are few dependencies that we decided are important enough to upper-bound them by default, as they are known to follow predictable versioning scheme, and we know that new versions of those are very likely to diff --git a/STATIC_CODE_CHECKS.rst b/STATIC_CODE_CHECKS.rst index 42aeb7d2327a8..6f2c98367f15a 100644 --- a/STATIC_CODE_CHECKS.rst +++ b/STATIC_CODE_CHECKS.rst @@ -232,12 +232,12 @@ require Breeze Docker image to be built locally. +-----------------------------------------------------------+--------------------------------------------------------------+---------+ | check-pydevd-left-in-code | Check for pydevd debug statements accidentally left | | +-----------------------------------------------------------+--------------------------------------------------------------+---------+ +| check-pyproject-toml-order | Check order of dependencies in pyproject.toml | | ++-----------------------------------------------------------+--------------------------------------------------------------+---------+ | check-revision-heads-map | Check that the REVISION_HEADS_MAP is up-to-date | | +-----------------------------------------------------------+--------------------------------------------------------------+---------+ | check-safe-filter-usage-in-html | Don't use safe in templates | | +-----------------------------------------------------------+--------------------------------------------------------------+---------+ -| check-setup-order | Check order of dependencies in setup.cfg and setup.py | | -+-----------------------------------------------------------+--------------------------------------------------------------+---------+ | check-sql-dependency-common-data-structure | Check dependency of SQL Providers with common data structure | | +-----------------------------------------------------------+--------------------------------------------------------------+---------+ | check-start-date-not-used-in-defaults | start_date not to be defined in default_args in example_dags | | @@ -362,7 +362,7 @@ require Breeze Docker image to be built locally. +-----------------------------------------------------------+--------------------------------------------------------------+---------+ | update-migration-references | Update migration ref doc | * | +-----------------------------------------------------------+--------------------------------------------------------------+---------+ -| update-providers-dependencies | Update cross-dependencies for providers packages | | +| update-providers-dependencies | Update dependencies for provider packages | | +-----------------------------------------------------------+--------------------------------------------------------------+---------+ | update-spelling-wordlist-to-be-sorted | Sort alphabetically and uniquify spelling_wordlist.txt | | +-----------------------------------------------------------+--------------------------------------------------------------+---------+ diff --git a/airflow/_vendor/README.md b/airflow/_vendor/README.md index e76d8beea360c..9d26a0257f753 100644 --- a/airflow/_vendor/README.md +++ b/airflow/_vendor/README.md @@ -30,7 +30,7 @@ Way to vendor a library or update a version: 3. Replace them with new files (only replace relevant python packages:move LICENSE ) * move license files to [licenses](../../licenses) folder * remove README and any other supporting files (they can be found in PyPI) - * make sure to add requirements from setup.py to airflow's setup.py with appropriate comment stating + * make sure to add requirements to airflow's ``pyproject.toml`` with appropriate comment stating why the requirements are added and when they should be removed 4. If you replace previous version, re-apply historical fixes from the "package" folder by cherry-picking them. diff --git a/airflow/providers/MANAGING_PROVIDERS_LIFECYCLE.rst b/airflow/providers/MANAGING_PROVIDERS_LIFECYCLE.rst index 41338f1f7482f..1a8f1cb848d3d 100644 --- a/airflow/providers/MANAGING_PROVIDERS_LIFECYCLE.rst +++ b/airflow/providers/MANAGING_PROVIDERS_LIFECYCLE.rst @@ -195,11 +195,16 @@ Documentation An important part of building a new provider is the documentation. Some steps for documentation occurs automatically by ``pre-commit`` see `Installing pre-commit guide `_ +Those are important files in the airflow source tree that affect providers. The ``pyproject.toml`` in root +Airflow folder is automatically generated based on content of ``provider.yaml`` file in each provider +when ``pre-commit`` is run. Files such as ``extra-packages-ref.rst`` should be manually updated because +they are manually formatted for better layout and ``pre-commit`` will just verify if the information +about provider is updated there. Files like ``commit.rst`` and ``CHANGELOG`` are automatically updated +by ``breeze release-management`` command by release manager when providers are released. + .. code-block:: bash - ├── INSTALL - ├── CONTRIBUTING.rst - ├── setup.py + ├── pyproject.toml ├── airflow/ │ └── providers/ │ └── / @@ -207,7 +212,6 @@ Some steps for documentation occurs automatically by ``pre-commit`` see `Install │ └── CHANGELOG.rst │ └── docs/ - ├── spelling_wordlist.txt ├── apache-airflow/ │ └── extra-packages-ref.rst ├── integration-logos// @@ -220,36 +224,8 @@ Some steps for documentation occurs automatically by ``pre-commit`` see `Install └── .rst -Files automatically updated by pre-commit: - -- ``INSTALL`` in provider - -Files automatically created when the provider is released: - -- ``docs/apache-airflow-providers-/commits.rst`` -- ``/airflow/providers//CHANGELOG`` - There is a chance that your provider's name is not a common English word. -In this case is necessary to add it to the file ``docs/spelling_wordlist.txt``. This file begin with capitalized words and -lowercase in the second block. - - .. code-block:: bash - - Namespace - Neo4j - Nextdoor - (new line) - Nones - NotFound - Nullable - ... - neo4j - neq - networkUri - (new line) - nginx - nobr - nodash +In this case is necessary to add it to the file ``docs/spelling_wordlist.txt``. Add your provider dependencies into ``provider.yaml`` under ``dependencies`` key.. If your provider doesn't have any dependency add a empty list. @@ -258,9 +234,9 @@ In the ``docs/apache-airflow-providers-/connections.rst``: - add information how to configure connection for your provider. -In the ``docs/apache-airflow-providers-/operators/.rst``: - -- add information how to use the Operator. It's important to add examples and additional information if your Operator has extra-parameters. +In the ``docs/apache-airflow-providers-/operators/.rst`` add information +how to use the Operator. It's important to add examples and additional information if your +Operator has extra-parameters. .. code-block:: RST @@ -284,7 +260,7 @@ In the ``docs/apache-airflow-providers-/operators/.r :end-before: [END howto_operator_] -Copy from another, similar provider the docs: ``docs/apache-airflow-providers-new_provider/*.rst``: +Copy from another, similar provider the docs: ``docs/apache-airflow-providers-/*.rst``: At least those docs should be present @@ -336,20 +312,6 @@ In the ``airflow/providers//provider.yaml`` add information of you - hook-class-name: airflow.providers..hooks..NewProviderHook - connection-type: provider-connection-type - hook-class-names: # deprecated in Airflow 2.2.0 - - airflow.providers..hooks..NewProviderHook - -.. note:: Defining your own connection types - - You only need to add ``connection-types`` in case you have some hooks that have customized UI behavior. However, - it is only supported for Airflow 2.2.0. If your providers are also targeting Airflow below 2.2.0 you should - provide the deprecated ``hook-class-names`` array. The ``connection-types`` array allows for optimization - of importing of individual connections and while Airflow 2.2.0 is able to handle both definition, the - ``connection-types`` is recommended. - - For more information see `Custom connection types `_ - - After changing and creating these files you can build the documentation locally. The two commands below will serve to accomplish this. The first will build your provider's documentation. The second will ensure that the main Airflow documentation that involves some steps with the providers is also working. @@ -488,14 +450,13 @@ to do. * You will have to run ``breeze setup regenerate-command-images`` to regenerate breeze help files * you will need to update ``extra-packages-ref.rst`` and in some cases - when mentioned there explicitly - - ``setup.py`` to remove the provider from list of dependencies. + ``pyproject.toml`` to remove the provider from list of dependencies. -What happens under-the-hood as a result, is that ``generated/providers.json`` file is updated with +What happens under-the-hood as a result, is that ``pyproject.toml`` file is updated with the information about available providers and their dependencies and it is used by our tooling to exclude suspended providers from all relevant parts of the build and CI system (such as building CI image with dependencies, building documentation, running tests, etc.) - Resuming providers ================== @@ -503,7 +464,6 @@ Resuming providers is done by reverting the original change that suspended it. I needed to fix problems in the reverted provider, our CI will detect them and you will have to fix them as part of the PR reverting the suspension. - Removing providers ================== diff --git a/airflow/providers/amazon/provider.yaml b/airflow/providers/amazon/provider.yaml index ed8a288424d3a..8ccd2511d623c 100644 --- a/airflow/providers/amazon/provider.yaml +++ b/airflow/providers/amazon/provider.yaml @@ -86,7 +86,8 @@ dependencies: # of this file). Currently we set aiobotocore as minimum 2.5.3 - as this is was the first version # that supported boto3 1.28. # NOTE!!! BEFORE botocore 1.33 BOTOCORE VERSIONS ARE SHIFTED BY 3 MINOR VERSIONS - # NOTE!!! Make sure to update _MIN_BOTO3_VERSION in setup.py when you update it here + # TODO(potiuk) update it to pyproject.toml generation + # NOTE!!! Make sure to update _MIN_BOTO3_VERSION in pyproject.toml when you update it here - boto3>=1.28.0 # NOTE!!! BEFORE botocore 1.33 version is always shifted by 3 MINOR VERSIONS from boto3, see: # - https://github.com/boto/boto3/issues/2702 diff --git a/airflow/providers/fab/provider.yaml b/airflow/providers/fab/provider.yaml index 80e834fcadd65..5f5b2860f854e 100644 --- a/airflow/providers/fab/provider.yaml +++ b/airflow/providers/fab/provider.yaml @@ -36,6 +36,11 @@ versions: dependencies: - apache-airflow>=2.9.0 - flask>=2.2,<2.3 + # We are tightly coupled with FAB version as we vendored-in part of FAB code related to security manager + # This is done as part of preparation to removing FAB as dependency, but we are not ready for it yet + # Every time we update FAB version here, please make sure that you review the classes and models in + # `airflow/providers/fan/auth_manager/security_manager/override.py` with their upstream counterparts. + # In particular, make sure any breaking changes, for example any new methods, are accounted for. - flask-appbuilder==4.3.10 - flask-login>=0.6.2 - google-re2>=1.0 diff --git a/airflow/utils/dot_renderer.py b/airflow/utils/dot_renderer.py index 41281fbbb1610..4d44d1e2ecf14 100644 --- a/airflow/utils/dot_renderer.py +++ b/airflow/utils/dot_renderer.py @@ -19,9 +19,14 @@ """Renderer DAG (tasks and dependencies) to the graphviz object.""" from __future__ import annotations +import warnings from typing import TYPE_CHECKING, Any -import graphviz +try: + import graphviz +except ImportError: + warnings.warn("Could not import graphviz. Rendering graph to the graphical format will not be possible.") + graphviz = None from airflow.exceptions import AirflowException from airflow.models.baseoperator import BaseOperator @@ -151,6 +156,10 @@ def render_dag_dependencies(deps: dict[str, list[DagDependency]]) -> graphviz.Di :param deps: List of DAG dependencies :return: Graphviz object """ + if not graphviz: + raise AirflowException( + "Could not import graphviz. Install the graphviz python package to fix this error." + ) dot = graphviz.Digraph(graph_attr={"rankdir": "LR"}) for dag, dependencies in deps.items(): @@ -179,6 +188,10 @@ def render_dag(dag: DAG, tis: list[TaskInstance] | None = None) -> graphviz.Digr :param tis: List of task instances :return: Graphviz object """ + if not graphviz: + raise AirflowException( + "Could not import graphviz. Install the graphviz python package to fix this error." + ) dot = graphviz.Digraph( dag.dag_id, graph_attr={ diff --git a/clients/gen/common.sh b/clients/gen/common.sh index cd78d6d039948..0437028351589 100755 --- a/clients/gen/common.sh +++ b/clients/gen/common.sh @@ -59,8 +59,7 @@ function validate_input { git_push.sh .gitlab-ci.yml requirements.txt -setup.cfg -setup.py +pyproject.toml test-requirements.txt tox.ini EOF diff --git a/dev/MANUALLY_GENERATING_IMAGE_CACHE_AND_CONSTRAINTS.md b/dev/MANUALLY_GENERATING_IMAGE_CACHE_AND_CONSTRAINTS.md index 29b25f5f1175f..e41d62969dd9f 100644 --- a/dev/MANUALLY_GENERATING_IMAGE_CACHE_AND_CONSTRAINTS.md +++ b/dev/MANUALLY_GENERATING_IMAGE_CACHE_AND_CONSTRAINTS.md @@ -424,7 +424,7 @@ The slight risk is that if there is a constraint problem that impacts regular PR make all PRs "red" until the constraint is fixed. However, if this is the case then usually we should fix the problem by fixing the tests or dependencies and the automated CI process should be able to self-heal. The main build does not use constraints and it will attempt to upgrade (or downgrade) the dependencies to -the latest version matching the dependency specification we have in setup.cfg/setup.py/provider.yaml files. +the latest version matching the dependency specification we have in pyproject.toml files. Also the constraints are pushed without `--force` so there is no risk of destroying anything. The history is kept in Git, so you can always revert to the previous version if needed. diff --git a/dev/README_RELEASE_AIRFLOW.md b/dev/README_RELEASE_AIRFLOW.md index 5b1ad59a40a9c..f658a5a38530c 100644 --- a/dev/README_RELEASE_AIRFLOW.md +++ b/dev/README_RELEASE_AIRFLOW.md @@ -76,7 +76,7 @@ For obvious reasons, you can't cherry-pick every change from `main` into the rel some are incompatible without a large set of other changes, some are brand-new features, and some just don't need to be in a release. In general only security fixes, data-loss bugs and regression fixes are essential to bring into a patch release; -also changes in dependencies (setup.py, setup.cfg) resulting from releasing newer versions of packages that Airflow depends on. +also changes in dependencies (pyproject.toml) resulting from releasing newer versions of packages that Airflow depends on. Other bugfixes can be added on a best-effort basis, but if something is going to be very difficult to backport (maybe it has a lot of conflicts, or heavily depends on a new feature or API that's not being backported), it's OK to leave it out of the release at your sole discretion as the release manager - diff --git a/dev/breeze/README.md b/dev/breeze/README.md index 9db81c1fd45d7..f638fd43bd2d5 100644 --- a/dev/breeze/README.md +++ b/dev/breeze/README.md @@ -66,6 +66,6 @@ PLEASE DO NOT MODIFY THE HASH BELOW! IT IS AUTOMATICALLY UPDATED BY PRE-COMMIT. --------------------------------------------------------------------------------------------------------- -Package config hash: c7d80ab49c6dc4bf2b54957663b0126ab9c8f48df28a34c0eb56340540cb1f52d063ef99ee5f9cacbd375b1a711278884f9ef9aab41e620fa70fffd81f7ece3c +Package config hash: 696421ec548aafeb9147af89940ca49f9603e3de43c157bb5feff76579d2ce502abd2651a0b48bde14aa7134d61c751267a25b7761150596fbb59385a9205a3f --------------------------------------------------------------------------------------------------------- diff --git a/dev/breeze/SELECTIVE_CHECKS.md b/dev/breeze/SELECTIVE_CHECKS.md index 2f7d9620417c7..545400e8c50cf 100644 --- a/dev/breeze/SELECTIVE_CHECKS.md +++ b/dev/breeze/SELECTIVE_CHECKS.md @@ -74,7 +74,7 @@ together using `pytest-xdist` (pytest-xdist distributes the tests among parallel ## Selective check decision rules * `Full tests` case is enabled when the event is PUSH, or SCHEDULE or we miss commit info or any of the - important environment files (setup.py, setup.cfg, provider.yaml, Dockerfile, build scripts) changed or + important environment files (pyproject.toml, Dockerfile, build scripts) changed or when `full tests needed` label is set. That enables all matrix combinations of variables (representative) and all possible test type. No further checks are performed. * Python, Kubernetes, Backend, Kind, Helm versions are limited to "defaults" only unless `Full tests` mode diff --git a/dev/breeze/doc/adr/0003-bootstrapping-virtual-environment.md b/dev/breeze/doc/adr/0003-bootstrapping-virtual-environment.md index 5cf11fe327929..d91b715da185b 100644 --- a/dev/breeze/doc/adr/0003-bootstrapping-virtual-environment.md +++ b/dev/breeze/doc/adr/0003-bootstrapping-virtual-environment.md @@ -101,7 +101,7 @@ version of Breeze will remain as part of the Airflow's source code. The decision is to implement Breeze in a subfolder (`dev/breeze2/`) of Apache Airflow as a Python project following the standard setuptools enabled project. The project contains setup.py and dependencies described -in setup.cfg and contains both source code and tests for Breeze code. +in pyproject.toml and contains both source code and tests for Breeze code. The sub-project could be used in the future to produce a PyPI package (we reserved such package in PyPI), however its main purpose is diff --git a/dev/breeze/pyproject.toml b/dev/breeze/pyproject.toml index aca59140ac51b..1cc3ba39ab806 100644 --- a/dev/breeze/pyproject.toml +++ b/dev/breeze/pyproject.toml @@ -50,6 +50,7 @@ dependencies = [ "filelock>=3.13.0", "flit>=3.5.0", "gitpython>=3.1.40", + "hatch>=1.9.1", "inputimeout>=1.0.4", "jinja2>=3.1.0", "jsonschema>=4.19.1", diff --git a/dev/breeze/src/airflow_breeze/commands/ci_image_commands.py b/dev/breeze/src/airflow_breeze/commands/ci_image_commands.py index 981124540fe32..0682728ff4127 100644 --- a/dev/breeze/src/airflow_breeze/commands/ci_image_commands.py +++ b/dev/breeze/src/airflow_breeze/commands/ci_image_commands.py @@ -46,7 +46,6 @@ option_image_tag_for_pulling, option_image_tag_for_verifying, option_install_mysql_client_type, - option_install_providers_from_sources, option_platform_multiple, option_prepare_buildx_cache, option_pull, @@ -304,7 +303,6 @@ def get_exitcode(status: int) -> int: @option_install_mysql_client_type @option_image_tag_for_building @option_include_success_outputs -@option_install_providers_from_sources @option_parallelism @option_platform_multiple @option_prepare_buildx_cache @@ -345,7 +343,6 @@ def build( image_tag: str, include_success_outputs, install_mysql_client_type: str, - install_providers_from_sources: bool, parallelism: int, platform: str | None, prepare_buildx_cache: bool, @@ -417,7 +414,6 @@ def run_build(ci_image_params: BuildCiParams) -> None: github_token=github_token, image_tag=image_tag, install_mysql_client_type=install_mysql_client_type, - install_providers_from_sources=install_providers_from_sources, prepare_buildx_cache=prepare_buildx_cache, push=push, python=python, diff --git a/dev/breeze/src/airflow_breeze/commands/ci_image_commands_config.py b/dev/breeze/src/airflow_breeze/commands/ci_image_commands_config.py index d790ae33a172d..97f5f2353887c 100644 --- a/dev/breeze/src/airflow_breeze/commands/ci_image_commands_config.py +++ b/dev/breeze/src/airflow_breeze/commands/ci_image_commands_config.py @@ -58,7 +58,6 @@ "--commit-sha", "--debian-version", "--install-mysql-client-type", - "--install-providers-from-sources", "--python-image", ], }, diff --git a/dev/breeze/src/airflow_breeze/commands/common_image_options.py b/dev/breeze/src/airflow_breeze/commands/common_image_options.py index dd94ffd499a25..1b8f9460949e3 100644 --- a/dev/breeze/src/airflow_breeze/commands/common_image_options.py +++ b/dev/breeze/src/airflow_breeze/commands/common_image_options.py @@ -146,12 +146,6 @@ default=ALLOWED_INSTALL_MYSQL_CLIENT_TYPES[0], envvar="INSTALL_MYSQL_CLIENT_TYPE", ) -option_install_providers_from_sources = click.option( - "--install-providers-from-sources", - help="Install providers from sources when installing.", - is_flag=True, - envvar="INSTALL_PROVIDERS_FROM_SOURCES", -) option_platform_multiple = click.option( "--platform", help="Platform for Airflow image.", diff --git a/dev/breeze/src/airflow_breeze/commands/production_image_commands.py b/dev/breeze/src/airflow_breeze/commands/production_image_commands.py index 9c2cb3a57c3a5..1266085c79a5a 100644 --- a/dev/breeze/src/airflow_breeze/commands/production_image_commands.py +++ b/dev/breeze/src/airflow_breeze/commands/production_image_commands.py @@ -41,7 +41,6 @@ option_image_tag_for_pulling, option_image_tag_for_verifying, option_install_mysql_client_type, - option_install_providers_from_sources, option_platform_multiple, option_prepare_buildx_cache, option_pull, @@ -233,7 +232,6 @@ def prod_image(): @option_image_tag_for_building @option_include_success_outputs @option_install_mysql_client_type -@option_install_providers_from_sources @option_parallelism @option_platform_multiple @option_prepare_buildx_cache @@ -284,7 +282,6 @@ def build( install_airflow_version: str | None, install_mysql_client_type: str, install_packages_from_context: bool, - install_providers_from_sources: bool, installation_method: str, parallelism: int, platform: str | None, @@ -348,7 +345,6 @@ def run_build(prod_image_params: BuildProdParams) -> None: install_airflow_version=install_airflow_version, install_mysql_client_type=install_mysql_client_type, install_packages_from_context=install_packages_from_context, - install_providers_from_sources=install_providers_from_sources, installation_method=installation_method, prepare_buildx_cache=prepare_buildx_cache, push=push, diff --git a/dev/breeze/src/airflow_breeze/commands/production_image_commands_config.py b/dev/breeze/src/airflow_breeze/commands/production_image_commands_config.py index 69c7d23c969ac..78b4b9e00d675 100644 --- a/dev/breeze/src/airflow_breeze/commands/production_image_commands_config.py +++ b/dev/breeze/src/airflow_breeze/commands/production_image_commands_config.py @@ -57,7 +57,6 @@ "--python-image", "--commit-sha", "--additional-pip-install-flags", - "--install-providers-from-sources", ], }, { diff --git a/dev/breeze/src/airflow_breeze/commands/release_candidate_command.py b/dev/breeze/src/airflow_breeze/commands/release_candidate_command.py index f88388a22a9f4..02ac5562f9c62 100644 --- a/dev/breeze/src/airflow_breeze/commands/release_candidate_command.py +++ b/dev/breeze/src/airflow_breeze/commands/release_candidate_command.py @@ -90,7 +90,7 @@ def tarball_release(version, version_without_rc): def create_artifacts_with_sdist(): - run_command(["python3", "setup.py", "compile_assets", "sdist", "bdist_wheel"], check=True) + run_command(["hatch", "build", "-t", "sdist", "-t", "wheel"], check=True) console_print("Artifacts created") @@ -356,7 +356,7 @@ def publish_release_candidate(version, previous_version, github_token): # Create the artifacts if confirm_action("Use breeze to create artifacts?"): create_artifacts_with_breeze() - elif confirm_action("Use setup.py to create artifacts?"): + elif confirm_action("Use hatch to create artifacts?"): create_artifacts_with_sdist() # Sign the release sign_the_release(airflow_repo_root) diff --git a/dev/breeze/src/airflow_breeze/commands/release_management_commands.py b/dev/breeze/src/airflow_breeze/commands/release_management_commands.py index d7ae88fd135a2..814cc5432533b 100644 --- a/dev/breeze/src/airflow_breeze/commands/release_management_commands.py +++ b/dev/breeze/src/airflow_breeze/commands/release_management_commands.py @@ -700,7 +700,7 @@ def run_generate_constraints_in_parallel( @release_management.command( name="generate-constraints", - help="Generates pinned constraint files with all extras from setup.py in parallel.", + help="Generates pinned constraint files with all extras from pyproject.toml in parallel.", ) @option_python @option_run_in_parallel diff --git a/dev/breeze/src/airflow_breeze/global_constants.py b/dev/breeze/src/airflow_breeze/global_constants.py index af03e4533b5f6..84103623c7db0 100644 --- a/dev/breeze/src/airflow_breeze/global_constants.py +++ b/dev/breeze/src/airflow_breeze/global_constants.py @@ -387,8 +387,7 @@ def get_airflow_extras(): # Initialize files for rebuild check FILES_FOR_REBUILD_CHECK = [ - "setup.py", - "setup.cfg", + "pyproject.toml", "Dockerfile.ci", ".dockerignore", "generated/provider_dependencies.json", diff --git a/dev/breeze/src/airflow_breeze/params/build_prod_params.py b/dev/breeze/src/airflow_breeze/params/build_prod_params.py index ed0cec9a8cb66..334edaab48503 100644 --- a/dev/breeze/src/airflow_breeze/params/build_prod_params.py +++ b/dev/breeze/src/airflow_breeze/params/build_prod_params.py @@ -212,7 +212,6 @@ def prepare_arguments_for_docker_build_command(self) -> list[str]: self._req_arg("DOCKER_CONTEXT_FILES", self.docker_context_files) self._req_arg("INSTALL_PACKAGES_FROM_CONTEXT", self.install_packages_from_context) self._req_arg("INSTALL_POSTGRES_CLIENT", self.install_postgres_client) - self._req_arg("INSTALL_PROVIDERS_FROM_SOURCES", self.install_providers_from_sources) self._req_arg("PYTHON_BASE_IMAGE", self.python_base_image) # optional build args self._opt_arg("AIRFLOW_CONSTRAINTS_LOCATION", self.airflow_constraints_location) diff --git a/dev/breeze/src/airflow_breeze/params/common_build_params.py b/dev/breeze/src/airflow_breeze/params/common_build_params.py index e1c77990e9f78..421bcc7c3a947 100644 --- a/dev/breeze/src/airflow_breeze/params/common_build_params.py +++ b/dev/breeze/src/airflow_breeze/params/common_build_params.py @@ -63,7 +63,6 @@ class CommonBuildParams: github_repository: str = APACHE_AIRFLOW_GITHUB_REPOSITORY github_token: str = os.environ.get("GITHUB_TOKEN", "") image_tag: str | None = None - install_providers_from_sources: bool = False install_mysql_client_type: str = ALLOWED_INSTALL_MYSQL_CLIENT_TYPES[0] platform: str = DOCKER_DEFAULT_PLATFORM prepare_buildx_cache: bool = False diff --git a/dev/breeze/src/airflow_breeze/params/shell_params.py b/dev/breeze/src/airflow_breeze/params/shell_params.py index e90d3480464e2..85fbecd6ca8e8 100644 --- a/dev/breeze/src/airflow_breeze/params/shell_params.py +++ b/dev/breeze/src/airflow_breeze/params/shell_params.py @@ -162,7 +162,6 @@ class ShellParams: image_tag: str | None = None include_mypy_volume: bool = False install_airflow_version: str = "" - install_providers_from_sources: bool = True install_selected_providers: str | None = None integration: tuple[str, ...] = () issue_id: str = "" @@ -526,7 +525,6 @@ def env_variables_for_docker_commands(self) -> dict[str, str]: _set_var(_env, "HOST_USER_ID", self.host_user_id) _set_var(_env, "INIT_SCRIPT_FILE", None, "init.sh") _set_var(_env, "INSTALL_AIRFLOW_VERSION", self.install_airflow_version) - _set_var(_env, "INSTALL_PROVIDERS_FROM_SOURCES", self.install_providers_from_sources) _set_var(_env, "INSTALL_SELECTED_PROVIDERS", self.install_selected_providers) _set_var(_env, "ISSUE_ID", self.issue_id) _set_var(_env, "LOAD_DEFAULT_CONNECTIONS", self.load_default_connections) diff --git a/dev/breeze/src/airflow_breeze/pre_commit_ids.py b/dev/breeze/src/airflow_breeze/pre_commit_ids.py index 2806f0f1e923c..dc6cc861b67e5 100644 --- a/dev/breeze/src/airflow_breeze/pre_commit_ids.py +++ b/dev/breeze/src/airflow_breeze/pre_commit_ids.py @@ -69,9 +69,9 @@ "check-providers-init-file-missing", "check-providers-subpackages-init-file-exist", "check-pydevd-left-in-code", + "check-pyproject-toml-order", "check-revision-heads-map", "check-safe-filter-usage-in-html", - "check-setup-order", "check-sql-dependency-common-data-structure", "check-start-date-not-used-in-defaults", "check-system-tests-present", diff --git a/dev/breeze/src/airflow_breeze/prepare_providers/provider_documentation.py b/dev/breeze/src/airflow_breeze/prepare_providers/provider_documentation.py index 391edb6056bd0..bcba72a01c666 100644 --- a/dev/breeze/src/airflow_breeze/prepare_providers/provider_documentation.py +++ b/dev/breeze/src/airflow_breeze/prepare_providers/provider_documentation.py @@ -656,7 +656,7 @@ def update_release_notes( ) -> tuple[bool, bool]: """Updates generated files. - This includes the readme, changes, and/or setup.cfg/setup.py/manifest.in/provider_info. + This includes the readme, changes, and/or pyproject.toml. :param provider_package_id: id of the package :param reapply_templates_only: regenerate already released documentation only - without updating versions diff --git a/dev/breeze/src/airflow_breeze/utils/docker_command_utils.py b/dev/breeze/src/airflow_breeze/utils/docker_command_utils.py index 5c8e98056d0ae..4e35cabe3be84 100644 --- a/dev/breeze/src/airflow_breeze/utils/docker_command_utils.py +++ b/dev/breeze/src/airflow_breeze/utils/docker_command_utils.py @@ -65,7 +65,7 @@ # Those are volumes that are mounted when MOUNT_SELECTED is chosen (which is the default when # entering Breeze. MOUNT_SELECTED prevents to mount the files that you can have accidentally added -# in your sources (or they were added automatically by setup.py etc.) to be mounted to container. +# in your sources (or they were added automatically by pyproject.toml) to be mounted to container. # This is important to get a "clean" environment for different python versions and to avoid # unnecessary slow-downs when you are mounting files on MacOS (which has very slow filesystem) # Any time you add a top-level folder in airflow that should also be added to container you should @@ -80,7 +80,6 @@ (".rat-excludes", "/opt/airflow/.rat-excludes"), ("BREEZE.rst", "/opt/airflow/BREEZE.rst"), ("LICENSE", "/opt/airflow/LICENSE"), - ("MANIFEST.in", "/opt/airflow/MANIFEST.in"), ("NOTICE", "/opt/airflow/NOTICE"), ("RELEASE_NOTES.rst", "/opt/airflow/RELEASE_NOTES.rst"), ("airflow", "/opt/airflow/airflow"), @@ -95,8 +94,6 @@ ("pyproject.toml", "/opt/airflow/pyproject.toml"), ("scripts", "/opt/airflow/scripts"), ("scripts/docker/entrypoint_ci.sh", "/entrypoint"), - ("setup.cfg", "/opt/airflow/setup.cfg"), - ("setup.py", "/opt/airflow/setup.py"), ("tests", "/opt/airflow/tests"), ("helm_tests", "/opt/airflow/helm_tests"), ("kubernetes_tests", "/opt/airflow/kubernetes_tests"), diff --git a/dev/breeze/src/airflow_breeze/utils/kubernetes_utils.py b/dev/breeze/src/airflow_breeze/utils/kubernetes_utils.py index b7002add2ae34..c2daa3d183911 100644 --- a/dev/breeze/src/airflow_breeze/utils/kubernetes_utils.py +++ b/dev/breeze/src/airflow_breeze/utils/kubernetes_utils.py @@ -293,7 +293,6 @@ def _install_packages_in_k8s_virtualenv(): str(K8S_REQUIREMENTS.resolve()), ] env = os.environ.copy() - env["INSTALL_PROVIDERS_FROM_SOURCES"] = "true" capture_output = True if get_verbose(): capture_output = False diff --git a/dev/breeze/src/airflow_breeze/utils/packages.py b/dev/breeze/src/airflow_breeze/utils/packages.py index 49626a4668efe..af637fb085e1c 100644 --- a/dev/breeze/src/airflow_breeze/utils/packages.py +++ b/dev/breeze/src/airflow_breeze/utils/packages.py @@ -53,8 +53,8 @@ LONG_PROVIDERS_PREFIX = "apache-airflow-providers-" -# TODO: use single source of truth for those -# for now we need to keep them in sync with the ones in setup.py +# TODO(potiuk): use single source of truth for those +# for now we need to keep them in sync with the ones in pyproject.toml PREINSTALLED_PROVIDERS = [ "common.sql", "ftp", diff --git a/dev/breeze/src/airflow_breeze/utils/path_utils.py b/dev/breeze/src/airflow_breeze/utils/path_utils.py index 21644a1dd9cb8..03876f553ca65 100644 --- a/dev/breeze/src/airflow_breeze/utils/path_utils.py +++ b/dev/breeze/src/airflow_breeze/utils/path_utils.py @@ -34,15 +34,15 @@ from airflow_breeze.utils.reinstall import reinstall_breeze, warn_dependencies_changed, warn_non_editable from airflow_breeze.utils.shared_options import get_verbose, set_forced_answer -AIRFLOW_CFG_FILE = "setup.cfg" +PYPROJECT_TOML = "pyproject.toml" def search_upwards_for_airflow_sources_root(start_from: Path) -> Path | None: root = Path(start_from.root) d = start_from while d != root: - attempt = d / AIRFLOW_CFG_FILE - if attempt.exists() and "name = apache-airflow\n" in attempt.read_text(): + attempt = d / PYPROJECT_TOML + if attempt.exists() and 'name = "apache-airflow"\n' in attempt.read_text(): return attempt.parent d = d.parent return None @@ -97,7 +97,7 @@ def get_package_setup_metadata_hash() -> str: return "NOT FOUND" -def get_sources_setup_metadata_hash(sources: Path) -> str: +def get_pyproject_toml_hash(sources: Path) -> str: try: the_hash = hashlib.new("blake2b") the_hash.update((sources / "dev" / "breeze" / "pyproject.toml").read_bytes()) @@ -108,7 +108,7 @@ def get_sources_setup_metadata_hash(sources: Path) -> str: def get_installation_sources_config_metadata_hash() -> str: """ - Retrieves hash of setup.py and setup.cfg files from the source of installation of Breeze. + Retrieves hash of pyproject.toml from the source of installation of Breeze. This is used in order to determine if we need to upgrade Breeze, because some setup files changed. Blake2b algorithm will not be flagged by security checkers @@ -118,14 +118,14 @@ def get_installation_sources_config_metadata_hash() -> str: installation_sources = get_installation_airflow_sources() if installation_sources is None: return "NOT FOUND" - return get_sources_setup_metadata_hash(installation_sources) + return get_pyproject_toml_hash(installation_sources) def get_used_sources_setup_metadata_hash() -> str: """ Retrieves hash of setup files from the currently used sources. """ - return get_sources_setup_metadata_hash(get_used_airflow_sources()) + return get_pyproject_toml_hash(get_used_airflow_sources()) def set_forced_answer_for_upgrade_check(): diff --git a/dev/breeze/src/airflow_breeze/utils/selective_checks.py b/dev/breeze/src/airflow_breeze/utils/selective_checks.py index 2cf57200a45e2..b08484ef52a0d 100644 --- a/dev/breeze/src/airflow_breeze/utils/selective_checks.py +++ b/dev/breeze/src/airflow_breeze/utils/selective_checks.py @@ -113,13 +113,12 @@ def __hash__(self): r"^dev/.*\.py$", r"^Dockerfile", r"^scripts", - r"^setup.py", - r"^setup.cfg", + r"^pyproject.toml", r"^generated/provider_dependencies.json$", ], FileGroupForCi.PYTHON_PRODUCTION_FILES: [ r"^airflow/.*\.py", - r"^setup.py", + r"^pyproject.toml", ], FileGroupForCi.JAVASCRIPT_PRODUCTION_FILES: [ r"^airflow/.*\.[jt]sx?", @@ -141,8 +140,6 @@ def __hash__(self): ], FileGroupForCi.SETUP_FILES: [ r"^pyproject.toml", - r"^setup.cfg", - r"^setup.py", r"^generated/provider_dependencies.json$", ], FileGroupForCi.DOC_FILES: [ diff --git a/dev/breeze/tests/test_selective_checks.py b/dev/breeze/tests/test_selective_checks.py index 495c98fd877a1..e4aa2275f1e5b 100644 --- a/dev/breeze/tests/test_selective_checks.py +++ b/dev/breeze/tests/test_selective_checks.py @@ -402,7 +402,7 @@ def assert_outputs_are_printed(expected_outputs: dict[str, str], stderr: str): ), ( pytest.param( - ("setup.py",), + ("pyproject.toml",), { "affected-providers-list-as-string": ALL_PROVIDERS_AFFECTED, "all-python-versions": "['3.8', '3.9', '3.10', '3.11']", @@ -421,7 +421,7 @@ def assert_outputs_are_printed(expected_outputs: dict[str, str], stderr: str): "parallel-test-types-list-as-string": ALL_CI_SELECTIVE_TEST_TYPES, }, id="Everything should run - including all providers and upgrading to " - "newer requirements as setup.py changed and all Python versions", + "newer requirements as pyproject.toml changed and all Python versions", ) ), ( @@ -1106,20 +1106,12 @@ def test_no_commit_provided_trigger_full_build_for_any_event_type(github_event): id="Regular source changed", ), pytest.param( - ("setup.py",), + ("pyproject.toml",), { "upgrade-to-newer-dependencies": "true", }, (), - id="Setup.py changed", - ), - pytest.param( - ("setup.cfg",), - { - "upgrade-to-newer-dependencies": "true", - }, - (), - id="Setup.cfg changed", + id="pyproject.toml changed", ), pytest.param( ("airflow/providers/microsoft/azure/provider.yaml",), diff --git a/MANIFEST.in b/dev/old_setup/MANIFEST.in similarity index 100% rename from MANIFEST.in rename to dev/old_setup/MANIFEST.in diff --git a/dev/sign.sh b/dev/sign.sh index 178ba3ccb7c80..0d4c2861a5c76 100755 --- a/dev/sign.sh +++ b/dev/sign.sh @@ -17,9 +17,7 @@ # under the License. set -euo pipefail -# Use this to sign the tar balls generated from -# python setup.py sdist --formats=gztar -# ie. sign.sh +# Use this to sign the tar balls generated via hatch # you will still be required to type in your signing key password # or it needs to be available in your keychain diff --git a/docker_tests/docker_tests_utils.py b/docker_tests/docker_tests_utils.py index 8d95805f6459c..7eea98e9bd40b 100644 --- a/docker_tests/docker_tests_utils.py +++ b/docker_tests/docker_tests_utils.py @@ -73,7 +73,8 @@ def display_dependency_conflict_message(): It can mean one of those: 1) The main is currently broken (other PRs will fail with the same error) -2) You changed some dependencies in setup.py or setup.cfg and they are conflicting. +2) You changed some dependencies in pyproject.toml (either manually or automatically by pre-commit) + and they are conflicting. diff --git a/docs/apache-airflow-providers/howto/create-custom-providers.rst b/docs/apache-airflow-providers/howto/create-custom-providers.rst index 7b2a2fcf4c1dd..daa9430f05859 100644 --- a/docs/apache-airflow-providers/howto/create-custom-providers.rst +++ b/docs/apache-airflow-providers/howto/create-custom-providers.rst @@ -196,8 +196,8 @@ names, so preferably choose package that is in your "domain". You need to do the following to turn an existing Python package into a provider (see below for examples): -* Add the ``apache_airflow_provider`` entry point in the ``setup.cfg`` - this tells airflow where to get - the required provider metadata +* Add the ``apache_airflow_provider`` entry point in the ``pyproject.toml`` file - this tells airflow + where to get the required provider metadata * Create the function that you refer to in the first step as part of your package: this functions returns a dictionary that contains all meta-data about your provider package * If you want Airflow to link to documentation of your Provider in the providers page, make sure @@ -211,16 +211,15 @@ You need to do the following to turn an existing Python package into a provider .. exampleinclude:: /../../airflow/provider_info.schema.json :language: json -Example ``setup.cfg``: +Example ``pyproject.toml``: -.. code-block:: cfg +.. code-block:: toml - [options.entry_points] - # the function get_provider_info is defined in myproviderpackage.somemodule - apache_airflow_provider= - provider_info=myproviderpackage.somemodule:get_provider_info + [project.entry-points."apache_airflow_provider"] + provider_info = "airflow.providers.myproviderpackage.get_provider_info:get_provider_info" -Example ``myproviderpackage/somemodule.py``: + +Example ``myproviderpackage/get_provider_info.py``: .. code-block:: Python @@ -235,7 +234,6 @@ Example ``myproviderpackage/somemodule.py``: } - **Is there a convention for a connection id and type?** Very good question. Glad that you asked. We usually follow the convention ``_default`` for connection diff --git a/docs/apache-airflow/administration-and-deployment/modules_management.rst b/docs/apache-airflow/administration-and-deployment/modules_management.rst index 44b360fbc83ef..a619d2a06ee24 100644 --- a/docs/apache-airflow/administration-and-deployment/modules_management.rst +++ b/docs/apache-airflow/administration-and-deployment/modules_management.rst @@ -331,20 +331,12 @@ packages, so learning how to build your package is handy. Here is how to create your package: -1. Before starting, install the following packages: +1. Before starting, choose and install the build/packaging tool that you will use, ideally it should be +PEP-621 compliant to be able to switch to a different tool easily. +The popular choices are setuptools, poetry, hatch, flit. -``setuptools``: setuptools is a package development process library designed -for creating and distributing Python packages. - -``wheel``: The wheel package provides a bdist_wheel command for setuptools. It -creates .whl file which is directly installable through the ``pip install`` -command. We can then upload the same file to `PyPI `_. - -.. code-block:: bash - - pip install --upgrade pip setuptools wheel - -2. Create the package directory - in our case, we will call it ``airflow_operators``. +2. Decide when you create your own package. create the package directory - in our case, + we will call it ``airflow_operators``. .. code-block:: bash @@ -358,42 +350,16 @@ command. We can then upload the same file to `PyPI `_. When we import this package, it should print the above message. -4. Create ``setup.py``: - -.. code-block:: python - - import setuptools - - setuptools.setup( - name="airflow_operators", - packages=setuptools.find_packages(), - ) - -5. Build the wheel: - -.. code-block:: bash - - python setup.py bdist_wheel +4. Create ``pyproject.toml`` and fill it with build tool configuration of your choice +See `The pyproject.toml specification `__ -This will create a few directories in the project and the overall structure will -look like following: +5. Build your project using the tool of your choice. For example for hatch it can be: .. code-block:: bash - . - ├── airflow_operators - │ ├── __init__.py - ├── airflow_operators.egg-info - │ ├── PKG-INFO - │ ├── SOURCES.txt - │ ├── dependency_links.txt - │ └── top_level.txt - ├── build - │ └── bdist.macosx-10.15-x86_64 - ├── dist - │ └── airflow_operators-0.0.0-py3-none-any.whl - └── setup.py + hatch build -t wheel +This will create .whl file in your ``dist`` folder 6. Install the .whl file using pip: diff --git a/docs/apache-airflow/extra-packages-ref.rst b/docs/apache-airflow/extra-packages-ref.rst index 082595312edf3..1aae96a097073 100644 --- a/docs/apache-airflow/extra-packages-ref.rst +++ b/docs/apache-airflow/extra-packages-ref.rst @@ -18,17 +18,24 @@ Reference for package extras '''''''''''''''''''''''''''' -Here's the list of all the extra dependencies of Apache Airflow. +Airflow has a number of optional "extras" that you can use to add features to your installation when you +are installing Airflow. Those extras are a good way for the users to manage their installation, but also +they are useful for contributors to airflow when they want to contribute some of the featuers - including +optional integrations of Airflow - via providers. -The entries with ``*`` in the ``Preinstalled`` column indicate that those extras (providers) are always -pre-installed when Airflow is installed. +,, warning:: + + Traditionally in Airflow some of the extras used `.` and `_` to separate the parts of the extra name. + This was not PEP-685 normalized name and we opted to change it to to `-` for all our extras, Expecting that + PEP-685 will be implemented in full by `pip` and other tools. Currently the normalization is happening + anyway, but `pip` shows warning when `_` or `-` are used, due to old packaging version used (January 2023). + The work is in progress to change it in `this issue ` so this + is anticipated that it will be fixed soon. + + TODO(potiuk): decide whether to do it. In the current proposal we changed everything to `_`. -.. note:: - You can disable automated installation of the providers with extras when installing Airflow. You need to - have ``INSTALL_PROVIDERS_FROM_SOURCES`` environment variable to ``true`` before running ``pip install`` - command. Contributors need to set it, if they are installing Airflow locally, and want to develop - providers directly via Airflow sources. This variable is automatically set in ``Breeze`` - development environment. Setting this variable is not needed in editable mode (``pip install -e``). + +Here's the list of all the extra dependencies of Apache Airflow. Core Airflow extras ------------------- @@ -52,6 +59,8 @@ python dependencies for the provided package. +---------------------+-----------------------------------------------------+----------------------------------------------------------------------------+ | google_auth | ``pip install 'apache-airflow[google_auth]'`` | Google auth backend | +---------------------+-----------------------------------------------------+----------------------------------------------------------------------------+ +| graphviz | ``pip install 'apache-airflow[graphvis]'`` | Enables exporting DAGs to .dot graphical output | ++---------------------+-----------------------------------------------------+----------------------------------------------------------------------------+ | kerberos | ``pip install 'apache-airflow[kerberos]'`` | Kerberos integration for Kerberized services (Hadoop, Presto, Trino) | +---------------------+-----------------------------------------------------+----------------------------------------------------------------------------+ | ldap | ``pip install 'apache-airflow[ldap]'`` | LDAP authentication for users | @@ -97,7 +106,7 @@ with a consistent set of dependencies based on constraint files provided by Airf .. code-block:: bash :substitutions: - pip install apache-airflow[google,amazon,apache.spark]==|version| \ + pip install apache-airflow[google,amazon,apache-spark]==|version| \ --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-|version|/constraints-3.8.txt" Note, that this will install providers in the versions that were released at the time of Airflow |version| release. You can later @@ -114,40 +123,39 @@ custom bash/python providers). +---------------------+-----------------------------------------------------+------------------------------------------------+ | extra | install command | enables | +=====================+=====================================================+================================================+ -| apache.atlas | ``pip install 'apache-airflow[apache.atlas]'`` | Apache Atlas | +| apache_atlas | ``pip install 'apache-airflow[apache_atlas]'`` | Apache Atlas | +---------------------+-----------------------------------------------------+------------------------------------------------+ -| apache.beam | ``pip install 'apache-airflow[apache.beam]'`` | Apache Beam operators & hooks | +| apache_beam | ``pip install 'apache-airflow[apache_beam]'`` | Apache Beam operators & hooks | +---------------------+-----------------------------------------------------+------------------------------------------------+ -| apache.cassandra | ``pip install 'apache-airflow[apache.cassandra]'`` | Cassandra related operators & hooks | +| apache_cassandra | ``pip install 'apache-airflow[apache_cassandra]'`` | Cassandra related operators & hooks | +---------------------+-----------------------------------------------------+------------------------------------------------+ -| apache.drill | ``pip install 'apache-airflow[apache.drill]'`` | Drill related operators & hooks | +| apache_drill | ``pip install 'apache-airflow[apache_drill]'`` | Drill related operators & hooks | +---------------------+-----------------------------------------------------+------------------------------------------------+ -| apache.druid | ``pip install 'apache-airflow[apache.druid]'`` | Druid related operators & hooks | +| apache_druid | ``pip install 'apache-airflow[apache_druid]'`` | Druid related operators & hooks | +---------------------+-----------------------------------------------------+------------------------------------------------+ -| apache.flink | ``pip install 'apache-airflow[apache.flink]'`` | Flink related operators & hooks | +| apache_flink | ``pip install 'apache-airflow[apache_flink]'`` | Flink related operators & hooks | +---------------------+-----------------------------------------------------+------------------------------------------------+ -| apache.hdfs | ``pip install 'apache-airflow[apache.hdfs]'`` | HDFS hooks and operators | +| apache_hdfs | ``pip install 'apache-airflow[apache_hdfs]'`` | HDFS hooks and operators | +---------------------+-----------------------------------------------------+------------------------------------------------+ -| apache.hive | ``pip install 'apache-airflow[apache.hive]'`` | All Hive related operators | +| apache_hive | ``pip install 'apache-airflow[apache_hive]'`` | All Hive related operators | +---------------------+-----------------------------------------------------+------------------------------------------------+ -| apache.impala | ``pip install 'apache-airflow[apache.impala]'`` | All Impala related operators & hooks | +| apache_impala | ``pip install 'apache-airflow[apache_impala]'`` | All Impala related operators & hooks | +---------------------+-----------------------------------------------------+------------------------------------------------+ -| apache.kafka | ``pip install 'apache-airflow[apache.kafka]'`` | All Kafka related operators & hooks | +| apache_kafka | ``pip install 'apache-airflow[apache_kafka]'`` | All Kafka related operators & hooks | +---------------------+-----------------------------------------------------+------------------------------------------------+ -| apache.kylin | ``pip install 'apache-airflow[apache.kylin]'`` | All Kylin related operators & hooks | +| apache_kylin | ``pip install 'apache-airflow[apache_kylin]'`` | All Kylin related operators & hooks | +---------------------+-----------------------------------------------------+------------------------------------------------+ -| apache.livy | ``pip install 'apache-airflow[apache.livy]'`` | All Livy related operators, hooks & sensors | +| apache_livy | ``pip install 'apache-airflow[apache_livy]'`` | All Livy related operators, hooks & sensors | +---------------------+-----------------------------------------------------+------------------------------------------------+ -| apache.pig | ``pip install 'apache-airflow[apache.pig]'`` | All Pig related operators & hooks | +| apache_pig | ``pip install 'apache-airflow[apache_pig]'`` | All Pig related operators & hooks | +---------------------+-----------------------------------------------------+------------------------------------------------+ -| apache.pinot | ``pip install 'apache-airflow[apache.pinot]'`` | All Pinot related hooks | +| apache_pinot | ``pip install 'apache-airflow[apache_pinot]'`` | All Pinot related hooks | +---------------------+-----------------------------------------------------+------------------------------------------------+ -| apache.spark | ``pip install 'apache-airflow[apache.spark]'`` | All Spark related operators & hooks | +| apache_spark | ``pip install 'apache-airflow[apache_spark]'`` | All Spark related operators & hooks | +---------------------+-----------------------------------------------------+------------------------------------------------+ -| apache.webhdfs | ``pip install 'apache-airflow[apache.webhdfs]'`` | HDFS hooks and operators | +| apache_webhdfs | ``pip install 'apache-airflow[apache_webhdfs]'`` | HDFS hooks and operators | +---------------------+-----------------------------------------------------+------------------------------------------------+ - External Services extras ======================== @@ -166,9 +174,9 @@ These are extras that add dependencies needed for integration with external serv +---------------------+-----------------------------------------------------+-----------------------------------------------------+ | asana | ``pip install 'apache-airflow[asana]'`` | Asana hooks and operators | +---------------------+-----------------------------------------------------+-----------------------------------------------------+ -| atlassian.jira | ``pip install 'apache-airflow[atlassian.jira]'`` | Jira hooks and operators | +| atlassian_jira | ``pip install 'apache-airflow[atlassian_jira]'`` | Jira hooks and operators | +---------------------+-----------------------------------------------------+-----------------------------------------------------+ -| azure | ``pip install 'apache-airflow[microsoft.azure]'`` | Microsoft Azure | +| microsoft_azure | ``pip install 'apache-airflow[microsoft_azure]'`` | Microsoft Azure | +---------------------+-----------------------------------------------------+-----------------------------------------------------+ | cloudant | ``pip install 'apache-airflow[cloudant]'`` | Cloudant hook | +---------------------+-----------------------------------------------------+-----------------------------------------------------+ @@ -178,7 +186,7 @@ These are extras that add dependencies needed for integration with external serv +---------------------+-----------------------------------------------------+-----------------------------------------------------+ | datadog | ``pip install 'apache-airflow[datadog]'`` | Datadog hooks and sensors | +---------------------+-----------------------------------------------------+-----------------------------------------------------+ -| dbt.cloud | ``pip install 'apache-airflow[dbt.cloud]'`` | dbt Cloud hooks and operators | +| dbt_cloud | ``pip install 'apache-airflow[dbt_cloud]'`` | dbt Cloud hooks and operators | +---------------------+-----------------------------------------------------+-----------------------------------------------------+ | dingding | ``pip install 'apache-airflow[dingding]'`` | Dingding hooks and sensors | +---------------------+-----------------------------------------------------+-----------------------------------------------------+ @@ -241,7 +249,7 @@ Some of those enable Airflow to use executors to run tasks with them - other tha +---------------------+-----------------------------------------------------+-----------------------------------------------------------------+----------------------------------------------+ | celery | ``pip install 'apache-airflow[celery]'`` | Celery dependencies and sensor | CeleryExecutor, CeleryKubernetesExecutor | +---------------------+-----------------------------------------------------+-----------------------------------------------------------------+----------------------------------------------+ -| cncf.kubernetes | ``pip install 'apache-airflow[cncf.kubernetes]'`` | Kubernetes client libraries, KubernetesPodOperator & friends | KubernetesExecutor, LocalKubernetesExecutor | +| cncf_kubernetes | ``pip install 'apache-airflow[cncf_kubernetes]'`` | Kubernetes client libraries, KubernetesPodOperator & friends | KubernetesExecutor, LocalKubernetesExecutor | +---------------------+-----------------------------------------------------+-----------------------------------------------------------------+----------------------------------------------+ | docker | ``pip install 'apache-airflow[docker]'`` | Docker hooks and operators | | +---------------------+-----------------------------------------------------+-----------------------------------------------------------------+----------------------------------------------+ @@ -259,7 +267,7 @@ Some of those enable Airflow to use executors to run tasks with them - other tha +---------------------+-----------------------------------------------------+-----------------------------------------------------------------+----------------------------------------------+ | mongo | ``pip install 'apache-airflow[mongo]'`` | Mongo hooks and operators | | +---------------------+-----------------------------------------------------+-----------------------------------------------------------------+----------------------------------------------+ -| microsoft.mssql | ``pip install 'apache-airflow[microsoft.mssql]'`` | Microsoft SQL Server operators and hook. | | +| microsoft_mssql | ``pip install 'apache-airflow[microsoft_mssql]'`` | Microsoft SQL Server operators and hook. | | +---------------------+-----------------------------------------------------+-----------------------------------------------------------------+----------------------------------------------+ | mysql | ``pip install 'apache-airflow[mysql]'`` | MySQL operators and hook | | +---------------------+-----------------------------------------------------+-----------------------------------------------------------------+----------------------------------------------+ @@ -290,12 +298,16 @@ Other extras These are extras that provide support for integration with external systems via some - usually - standard protocols. +The entries with ``*`` in the ``Preinstalled`` column indicate that those extras (providers) are always +pre-installed when Airflow is installed. + + +---------------------+-----------------------------------------------------+--------------------------------------+--------------+ | extra | install command | enables | Preinstalled | +=====================+=====================================================+======================================+==============+ -| common.io | ``pip install 'apache-airflow[common.io]'`` | Core IO Operators | | +| common_io | ``pip install 'apache-airflow[common_io]'`` | Core IO Operators | | +---------------------+-----------------------------------------------------+--------------------------------------+--------------+ -| common.sql | ``pip install 'apache-airflow[common.sql]'`` | Core SQL Operators | * | +| common_sql | ``pip install 'apache-airflow[common_sql]'`` | Core SQL Operators | * | +---------------------+-----------------------------------------------------+--------------------------------------+--------------+ | ftp | ``pip install 'apache-airflow[ftp]'`` | FTP hooks and operators | * | +---------------------+-----------------------------------------------------+--------------------------------------+--------------+ @@ -307,9 +319,9 @@ These are extras that provide support for integration with external systems via +---------------------+-----------------------------------------------------+--------------------------------------+--------------+ | jdbc | ``pip install 'apache-airflow[jdbc]'`` | JDBC hooks and operators | | +---------------------+-----------------------------------------------------+--------------------------------------+--------------+ -| microsoft.psrp | ``pip install 'apache-airflow[microsoft.psrp]'`` | PSRP hooks and operators | | +| microsoft_psrp | ``pip install 'apache-airflow[microsoft_psrp]'`` | PSRP hooks and operators | | +---------------------+-----------------------------------------------------+--------------------------------------+--------------+ -| microsoft.winrm | ``pip install 'apache-airflow[microsoft.winrm]'`` | WinRM hooks and operators | | +| microsoft_winrm | ``pip install 'apache-airflow[microsoft_winrm]'`` | WinRM hooks and operators | | +---------------------+-----------------------------------------------------+--------------------------------------+--------------+ | openlineage | ``pip install 'apache-airflow[openlineage]'`` | Sending OpenLineage events | | +---------------------+-----------------------------------------------------+--------------------------------------+--------------+ @@ -326,15 +338,10 @@ These are extras that provide support for integration with external systems via | ssh | ``pip install 'apache-airflow[ssh]'`` | SSH hooks and operators | | +---------------------+-----------------------------------------------------+--------------------------------------+--------------+ -Bundle extras -------------- +Production Bundle extras +------------------------- -These are extras that install one or more extras as a bundle. Note that these extras should only be used for "development" version -of Airflow - i.e. when Airflow is installed from sources. Because of the way how bundle extras are constructed they might not -work when airflow is installed from 'PyPI`. - -If you want to install Airflow from PyPI with "all" extras (which should basically be never needed - you almost never need all extras from Airflow), -you need to list explicitly all the non-bundle extras that you want to install. +These are extras that install one or more extras as a bundle. +---------------------+-----------------------------------------------------+------------------------------------------------------------------------+ | extra | install command | enables | @@ -343,27 +350,278 @@ you need to list explicitly all the non-bundle extras that you want to install. +---------------------+-----------------------------------------------------+------------------------------------------------------------------------+ | all_dbs | ``pip install 'apache-airflow[all_dbs]'`` | All database integrations | +---------------------+-----------------------------------------------------+------------------------------------------------------------------------+ -| devel | ``pip install 'apache-airflow[devel]'`` | Minimum development dependencies (without Hadoop, Kerberos, providers) | -+---------------------+-----------------------------------------------------+------------------------------------------------------------------------+ -| devel_hadoop | ``pip install 'apache-airflow[devel_hadoop]'`` | Adds Hadoop stack libraries to ``devel`` dependencies | + +Development extras +------------------ + +Generally none of the ``devel`` extras install providers - they expect the providers to be used from sources +and those extras only make sense in editable mode. Users of Airflow should not be using them, unless they +start contributing back and install airflow from sources. + +Those extras are only available in Airflow when it is installed in editable mode from sources +(``pip install -e .``). + +Devel extras +============ + +The devel extras do not install dependencies for features of Airflow, but add functionality that is needed to +develop Airflow, such as running tests, static checks. They do not install provider packages - even if they might be related +to some providers (like ``devel_amazon``) but they might be needed if you want to test code of thoe corresponding +provider. + +Even if some ``devel`` extras relate to providers - they do not install provider packages - for example +``devel_amazon`` does not install amazon provider) but they might be needed if you want to test code of +the corresponding provider (for example running mypy checks or running tests). + ++---------------------+-----------------------------------------+------------------------------------------------------+ +| extra | install command | enables | ++=====================+=========================================+======================================================+ +| devel_amazon | pip install -e '.[devel_amazon]' | Adds all test libraries needed to test amazon | ++---------------------+-----------------------------------------+------------------------------------------------------+ +| devel_azure | pip install -e '.[devel_azure]' | Adds all test libraries needed to test azure | ++---------------------+-----------------------------------------+------------------------------------------------------+ +| devel_breeze | pip install -e '.[devel_breeze]' | Adds all test libraries needed to test breeze | ++---------------------+-----------------------------------------+------------------------------------------------------+ +| devel_debuggers | pip install -e '.[devel_debuggers]' | Adds all test libraries needed to test debuggers | ++---------------------+-----------------------------------------+------------------------------------------------------+ +| devel_deltalake | pip install -e '.[devel_deltalake]' | Adds all test libraries needed to test deltalake | ++---------------------+-----------------------------------------+------------------------------------------------------+ +| devel_devscripts | pip install -e '.[devel_devscripts]' | Adds all test libraries needed to test devscripts | ++---------------------+-----------------------------------------+------------------------------------------------------+ +| devel_duckdb | pip install -e '.[devel_duckdb]' | Adds all test libraries needed to test duckdb | ++---------------------+-----------------------------------------+------------------------------------------------------+ +| devel_iceberg | pip install -e '.[devel_iceberg]' | Adds all test libraries needed to test iceberg | ++---------------------+-----------------------------------------+------------------------------------------------------+ +| devel_mongo | pip install -e '.[devel_mongo]' | Adds all test libraries needed to test mongo | ++---------------------+-----------------------------------------+------------------------------------------------------+ +| devel_mypy | pip install -e '.[devel_mypy]' | Adds all test libraries needed to test mypy | ++---------------------+-----------------------------------------+------------------------------------------------------+ +| devel_sentry | pip install -e '.[devel_sentry]' | Adds all test libraries needed to test sentry | ++---------------------+-----------------------------------------+------------------------------------------------------+ +| devel_static_checks | pip install -e '.[devel_static_checks]' | Adds all test libraries needed to test static_checks | ++---------------------+-----------------------------------------+------------------------------------------------------+ +| devel_tests | pip install -e '.[devel_tests]' | Adds all test libraries needed to test tests | ++---------------------+-----------------------------------------+------------------------------------------------------+ + +Editable provider extras +======================== + +In order to test providers when installing Airflow in editable, development mode, you need to install +dependencies of the providers. This is done by installing the ``editable`` extra with ``pip install -e``. +Those extras are not available in the released PyPI wheel packages, they are only available when Airflow +is installed locally in editable mode. + ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| extra | install command | enables | ++=================================+=====================================================+============================================================+ +| editable_airbyte | pip install -e '.[editable_airbyte]' | Adds all libraries needed by the airbyte provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_alibaba | pip install -e '.[editable_alibaba]' | Adds all libraries needed by the alibaba provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_amazon | pip install -e '.[editable_amazon]' | Adds all libraries needed by the amazon provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_apache_beam | pip install -e '.[editable_apache_beam]' | Adds all libraries needed by the apache_beam provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_apache_cassandra | pip install -e '.[editable_apache_cassandra]' | Adds all libraries needed by the apache_cassandra provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_apache_drill | pip install -e '.[editable_apache_drill]' | Adds all libraries needed by the apache_drill provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_apache_druid | pip install -e '.[editable_apache_druid]' | Adds all libraries needed by the apache_druid provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_apache_flink | pip install -e '.[editable_apache_flink]' | Adds all libraries needed by the apache_flink provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_apache_hdfs | pip install -e '.[editable_apache_hdfs]' | Adds all libraries needed by the apache_hdfs provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_apache_hive | pip install -e '.[editable_apache_hive]' | Adds all libraries needed by the apache_hive provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_apache_impala | pip install -e '.[editable_apache_impala]' | Adds all libraries needed by the apache_impala provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_apache_kafka | pip install -e '.[editable_apache_kafka]' | Adds all libraries needed by the apache_kafka provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_apache_kylin | pip install -e '.[editable_apache_kylin]' | Adds all libraries needed by the apache_kylin provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_apache_livy | pip install -e '.[editable_apache_livy]' | Adds all libraries needed by the apache_livy provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_apache_pig | pip install -e '.[editable_apache_pig]' | Adds all libraries needed by the apache_pig provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_apache_pinot | pip install -e '.[editable_apache_pinot]' | Adds all libraries needed by the apache_pinot provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_apache_spark | pip install -e '.[editable_apache_spark]' | Adds all libraries needed by the apache_spark provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_apprise | pip install -e '.[editable_apprise]' | Adds all libraries needed by the apprise provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_arangodb | pip install -e '.[editable_arangodb]' | Adds all libraries needed by the arangodb provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_asana | pip install -e '.[editable_asana]' | Adds all libraries needed by the asana provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_atlassian_jira | pip install -e '.[editable_atlassian_jira]' | Adds all libraries needed by the atlassian_jira provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_celery | pip install -e '.[editable_celery]' | Adds all libraries needed by the celery provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_cloudant | pip install -e '.[editable_cloudant]' | Adds all libraries needed by the cloudant provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_cncf_kubernetes | pip install -e '.[editable_cncf_kubernetes]' | Adds all libraries needed by the cncf_kubernetes provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_cohere | pip install -e '.[editable_cohere]' | Adds all libraries needed by the cohere provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_common_io | pip install -e '.[editable_common_io]' | Adds all libraries needed by the common_io provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_common_sql | pip install -e '.[editable_common_sql]' | Adds all libraries needed by the common_sql provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_databricks | pip install -e '.[editable_databricks]' | Adds all libraries needed by the databricks provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_datadog | pip install -e '.[editable_datadog]' | Adds all libraries needed by the datadog provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_dbt_cloud | pip install -e '.[editable_dbt_cloud]' | Adds all libraries needed by the dbt_cloud provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_dingding | pip install -e '.[editable_dingding]' | Adds all libraries needed by the dingding provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_discord | pip install -e '.[editable_discord]' | Adds all libraries needed by the discord provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_docker | pip install -e '.[editable_docker]' | Adds all libraries needed by the docker provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_elasticsearch | pip install -e '.[editable_elasticsearch]' | Adds all libraries needed by the elasticsearch provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_exasol | pip install -e '.[editable_exasol]' | Adds all libraries needed by the exasol provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_fab | pip install -e '.[editable_fab]' | Adds all libraries needed by the fab provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_facebook | pip install -e '.[editable_facebook]' | Adds all libraries needed by the facebook provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_ftp | pip install -e '.[editable_ftp]' | Adds all libraries needed by the ftp provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_github | pip install -e '.[editable_github]' | Adds all libraries needed by the github provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_google | pip install -e '.[editable_google]' | Adds all libraries needed by the google provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_grpc | pip install -e '.[editable_grpc]' | Adds all libraries needed by the grpc provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_hashicorp | pip install -e '.[editable_hashicorp]' | Adds all libraries needed by the hashicorp provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_http | pip install -e '.[editable_http]' | Adds all libraries needed by the http provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_imap | pip install -e '.[editable_imap]' | Adds all libraries needed by the imap provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_influxdb | pip install -e '.[editable_influxdb]' | Adds all libraries needed by the influxdb provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_jdbc | pip install -e '.[editable_jdbc]' | Adds all libraries needed by the jdbc provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_jenkins | pip install -e '.[editable_jenkins]' | Adds all libraries needed by the jenkins provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_microsoft_azure | pip install -e '.[editable_microsoft_azure]' | Adds all libraries needed by the microsoft_azure provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_microsoft_mssql | pip install -e '.[editable_microsoft_mssql]' | Adds all libraries needed by the microsoft_mssql provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_microsoft_psrp | pip install -e '.[editable_microsoft_psrp]' | Adds all libraries needed by the microsoft_psrp provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_microsoft_winrm | pip install -e '.[editable_microsoft_winrm]' | Adds all libraries needed by the microsoft_winrm provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_mongo | pip install -e '.[editable_mongo]' | Adds all libraries needed by the mongo provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_mysql | pip install -e '.[editable_mysql]' | Adds all libraries needed by the mysql provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_neo4j | pip install -e '.[editable_neo4j]' | Adds all libraries needed by the neo4j provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_odbc | pip install -e '.[editable_odbc]' | Adds all libraries needed by the odbc provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_openai | pip install -e '.[editable_openai]' | Adds all libraries needed by the openai provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_openfaas | pip install -e '.[editable_openfaas]' | Adds all libraries needed by the openfaas provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_openlineage | pip install -e '.[editable_openlineage]' | Adds all libraries needed by the openlineage provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_opensearch | pip install -e '.[editable_opensearch]' | Adds all libraries needed by the opensearch provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_opsgenie | pip install -e '.[editable_opsgenie]' | Adds all libraries needed by the opsgenie provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_oracle | pip install -e '.[editable_oracle]' | Adds all libraries needed by the oracle provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_pagerduty | pip install -e '.[editable_pagerduty]' | Adds all libraries needed by the pagerduty provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_papermill | pip install -e '.[editable_papermill]' | Adds all libraries needed by the papermill provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_pgvector | pip install -e '.[editable_pgvector]' | Adds all libraries needed by the pgvector provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_pinecone | pip install -e '.[editable_pinecone]' | Adds all libraries needed by the pinecone provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_postgres | pip install -e '.[editable_postgres]' | Adds all libraries needed by the postgres provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_presto | pip install -e '.[editable_presto]' | Adds all libraries needed by the presto provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_redis | pip install -e '.[editable_redis]' | Adds all libraries needed by the redis provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_salesforce | pip install -e '.[editable_salesforce]' | Adds all libraries needed by the salesforce provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_samba | pip install -e '.[editable_samba]' | Adds all libraries needed by the samba provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_segment | pip install -e '.[editable_segment]' | Adds all libraries needed by the segment provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_sendgrid | pip install -e '.[editable_sendgrid]' | Adds all libraries needed by the sendgrid provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_sftp | pip install -e '.[editable_sftp]' | Adds all libraries needed by the sftp provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_singularity | pip install -e '.[editable_singularity]' | Adds all libraries needed by the singularity provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_slack | pip install -e '.[editable_slack]' | Adds all libraries needed by the slack provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_smtp | pip install -e '.[editable_smtp]' | Adds all libraries needed by the smtp provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_snowflake | pip install -e '.[editable_snowflake]' | Adds all libraries needed by the snowflake provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_sqlite | pip install -e '.[editable_sqlite]' | Adds all libraries needed by the sqlite provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_ssh | pip install -e '.[editable_ssh]' | Adds all libraries needed by the ssh provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_tableau | pip install -e '.[editable_tableau]' | Adds all libraries needed by the tableau provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_tabular | pip install -e '.[editable_tabular]' | Adds all libraries needed by the tabular provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_telegram | pip install -e '.[editable_telegram]' | Adds all libraries needed by the telegram provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_trino | pip install -e '.[editable_trino]' | Adds all libraries needed by the trino provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_vertica | pip install -e '.[editable_vertica]' | Adds all libraries needed by the vertica provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_weaviate | pip install -e '.[editable_weaviate]' | Adds all libraries needed by the weaviate provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_yandex | pip install -e '.[editable_yandex]' | Adds all libraries needed by the yandex provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ +| editable_zendesk | pip install -e '.[editable_zendesk]' | Adds all libraries needed by the zendesk provider | ++---------------------------------+-----------------------------------------------------+------------------------------------------------------------+ + +Doc extras +========== + +Those are the extras that are needed to generated documentation for Airflow. This is used for development time only + +---------------------+-----------------------------------------------------+------------------------------------------------------------------------+ -| devel_all | ``pip install 'apache-airflow[devel_all]'`` | Everything needed for development including Hadoop and providers | +| extra | install command | enables | ++=====================+=====================================================+========================================================================+ +| doc | ``pip install -e '.[doc]'`` | Packages needed to build docs (included in ``devel``) | +---------------------+-----------------------------------------------------+------------------------------------------------------------------------+ -| devel_ci | ``pip install 'apache-airflow[devel_ci]'`` | All dependencies required for CI tests (same as ``devel_all``) | +| doc_gen | ``pip install -e '.[doc_gen]'`` | Packages needed to generate er diagrams (included in ``devel_all``) | +---------------------+-----------------------------------------------------+------------------------------------------------------------------------+ -Doc extras ----------- +Bundle devel extras +=================== -Those are the extras that are needed to generated documentation for Airflow. This is used for development time only +Those are extras that bundle devel, editable and doc extras together to make it easy to install them together in a single installation. Some of the +extras are more difficult to install on certain systems (such as ARM MacBooks) because they require system level dependencies to be installed. -+---------------------+-----------------------------------------------------+----------------------------------------------------------------------+ -| extra | install command | enables | -+---------------------+-----------------------------------------------------+----------------------------------------------------------------------+ -| doc | ``pip install 'apache-airflow[doc]'`` | Packages needed to build docs (included in ``devel``) | -+---------------------+-----------------------------------------------------+----------------------------------------------------------------------+ -| doc_gen | ``pip install 'apache-airflow[doc_gen]'`` | Packages needed to generate er diagrams (included in ``devel_all``) | -+---------------------+-----------------------------------------------------+----------------------------------------------------------------------+ ++---------------------+-----------------------------------------------------+------------------------------------------------------------------------+ +| extra | install command | enables | ++=====================+=====================================================+========================================================================+ +| devel | ``pip install -e '.[devel]'`` | Minimum development dependencies (without Hadoop, Kerberos, providers) | ++---------------------+-----------------------------------------------------+------------------------------------------------------------------------+ +| devel_hadoop | ``pip install -e '.[devel_hadoop]'`` | Adds Hadoop stack libraries ``devel`` dependencies | ++---------------------+-----------------------------------------------------+------------------------------------------------------------------------+ +| devel_all_dbs | ``pip install -e '.[devel_all_dbs]'`` | Adds all libraries (editable extras) needed to test database providers | ++---------------------+-----------------------------------------------------+------------------------------------------------------------------------+ +| devel_all | ``pip install -e '.[devel_all]'`` | Everything needed for development including Hadoop, all devel extras, | +| | | all doc extras, all editable extras. Generally: all dependencies | ++---------------------+-----------------------------------------------------+------------------------------------------------------------------------+ +| devel_ci | ``pip install -e '.[devel_ci]'`` | All dependencies required for CI tests (same as ``devel_all``) | ++---------------------+-----------------------------------------------------+------------------------------------------------------------------------+ Deprecated 1.10 extras @@ -378,37 +636,37 @@ so there is no replacement for ``crypto`` extra. +---------------------+-----------------------------+ | Deprecated extra | Extra to be used instead | +=====================+=============================+ -| atlas | apache.atlas | +| atlas | apache_atlas | +---------------------+-----------------------------+ | aws | amazon | +---------------------+-----------------------------+ -| azure | microsoft.azure | +| azure | microsoft_azure | +---------------------+-----------------------------+ -| cassandra | apache.cassandra | +| cassandra | apache_cassandra | +---------------------+-----------------------------+ | crypto | | +---------------------+-----------------------------+ -| druid | apache.druid | +| druid | apache_druid | +---------------------+-----------------------------+ | gcp | google | +---------------------+-----------------------------+ | gcp_api | google | +---------------------+-----------------------------+ -| hdfs | apache.hdfs | +| hdfs | apache_hdfs | +---------------------+-----------------------------+ -| hive | apache.hive | +| hive | apache_hive | +---------------------+-----------------------------+ -| kubernetes | cncf.kubernetes | +| kubernetes | cncf_kubernetes | +---------------------+-----------------------------+ -| mssql | microsoft.mssql | +| mssql | microsoft_mssql | +---------------------+-----------------------------+ -| pinot | apache.pinot | +| pinot | apache_pinot | +---------------------+-----------------------------+ | s3 | amazon | +---------------------+-----------------------------+ -| spark | apache.spark | +| spark | apache_spark | +---------------------+-----------------------------+ -| webhdfs | apache.webhdfs | +| webhdfs | apache_webhdfs | +---------------------+-----------------------------+ -| winrm | microsoft.winrm | +| winrm | microsoft_winrm | +---------------------+-----------------------------+ diff --git a/docs/apache-airflow/installation/installing-from-pypi.rst b/docs/apache-airflow/installation/installing-from-pypi.rst index ed345f0cc1f38..6c379f6639d38 100644 --- a/docs/apache-airflow/installation/installing-from-pypi.rst +++ b/docs/apache-airflow/installation/installing-from-pypi.rst @@ -84,9 +84,9 @@ Airflow™ installation can be tricky because Airflow is both a library and an a Libraries usually keep their dependencies open and applications usually pin them, but we should do neither and both at the same time. We decided to keep our dependencies as open as possible -(in ``setup.cfg`` and ``setup.py``) so users can install different -version of libraries if needed. This means that from time to time plain ``pip install apache-airflow`` will -not work or will produce an unusable Airflow installation. +(in ``pyproject.toml``) so users can install different version of libraries if needed. This means that +from time to time plain ``pip install apache-airflow`` will not work or will produce an unusable +Airflow installation. Reproducible Airflow installation ================================= diff --git a/docs/docker-stack/build-arg-ref.rst b/docs/docker-stack/build-arg-ref.rst index a07760558eed2..7a9ae11d07cbd 100644 --- a/docs/docker-stack/build-arg-ref.rst +++ b/docs/docker-stack/build-arg-ref.rst @@ -224,12 +224,6 @@ You can see some examples of those in: | | | for Airflow version installation - for | | | | example ``<2.0.2`` for automated builds. | +------------------------------------+------------------------------------------+------------------------------------------+ -| ``INSTALL_PROVIDERS_FROM_SOURCES`` | ``false`` | If set to ``true`` and image is built | -| | | from sources, all provider packages are | -| | | installed from sources rather than from | -| | | packages. It has no effect when | -| | | installing from PyPI or GitHub repo. | -+------------------------------------+------------------------------------------+------------------------------------------+ | ``AIRFLOW_CONSTRAINTS_LOCATION`` | | If not empty, it will override the | | | | source of the constraints with the | | | | specified URL or file. Note that the | @@ -259,7 +253,7 @@ Pre-caching PIP dependencies ............................ When image is build from PIP, by default pre-caching of PIP dependencies is used. This is in order to speed-up incremental -builds during development. When pre-cached PIP dependencies are used and ``setup.py`` or ``setup.cfg`` changes, the +builds during development. When pre-cached PIP dependencies are used and ``pyproject.toml`` changes, the PIP dependencies are already pre-installed, thus resulting in much faster image rebuild. This is purely an optimization of time needed to build the images and should be disabled if you want to install Airflow from Docker context files. diff --git a/generated/PYPI_README.md b/generated/PYPI_README.md index 105bf11f67789..608cab6392589 100644 --- a/generated/PYPI_README.md +++ b/generated/PYPI_README.md @@ -98,7 +98,7 @@ Documentation for dependent projects like provider packages, Docker image, Helm We publish Apache Airflow as `apache-airflow` package in PyPI. Installing it however might be sometimes tricky because Airflow is a bit of both a library and application. Libraries usually keep their dependencies open, and applications usually pin them, but we should do neither and both simultaneously. We decided to keep -our dependencies as open as possible (in `setup.py`) so users can install different versions of libraries +our dependencies as open as possible (in `pyproject.toml`) so users can install different versions of libraries if needed. This means that `pip install apache-airflow` will not work from time to time or will produce unusable Airflow installation. diff --git a/hatch_build.py b/hatch_build.py new file mode 100644 index 0000000000000..bc6e4d1b878db --- /dev/null +++ b/hatch_build.py @@ -0,0 +1,144 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import logging +import os +from pathlib import Path +from subprocess import run +from typing import Any, Callable, Iterable + +from hatchling.builders.config import BuilderConfig +from hatchling.builders.hooks.plugin.interface import BuildHookInterface +from hatchling.builders.plugin.interface import BuilderInterface +from hatchling.plugin.manager import PluginManager + +log = logging.getLogger(__name__) +log_level = logging.getLevelName(os.getenv("CUSTOM_AIRFLOW_BUILD_LOG_LEVEL", "INFO")) +log.setLevel(log_level) + + +class CustomBuild(BuilderInterface[BuilderConfig, PluginManager]): + """Custom build class for Airflow assets and git version.""" + + PLUGIN_NAME = "custom" + + def clean(self, directory: str, versions: Iterable[str]) -> None: + work_dir = Path(self.root) + commands = [ + ["rm -rf airflow/www/static/dist"], + ["rm -rf airflow/www/node_modules"], + ] + for cmd in commands: + run(cmd, cwd=work_dir.as_posix(), check=True, shell=True) + + def get_version_api(self) -> dict[str, Callable[..., str]]: + """Custom build target for standard package preparation.""" + return {"standard": self.build_standard} + + def build_standard(self, directory: str, artifacts: Any, **build_data: Any) -> str: + self.write_git_version() + work_dir = Path(self.root) + commands = [ + ["pre-commit run --hook-stage manual compile-www-assets --all-files"], + ] + for cmd in commands: + run(cmd, cwd=work_dir.as_posix(), check=True, shell=True) + dist_path = work_dir / "airflow" / "www" / "static" / "dist" + return dist_path.resolve().as_posix() + + def get_git_version(self) -> str: + """ + Return a version to identify the state of the underlying git repo. + + The version will indicate whether the head of the current git-backed working directory + is tied to a release tag or not. It will indicate the former with a 'release:{version}' + prefix and the latter with a '.dev0' suffix. Following the prefix will be a sha of the + current branch head. Finally, a "dirty" suffix is appended to indicate that uncommitted + changes are present. + + Example pre-release version: ".dev0+2f635dc265e78db6708f59f68e8009abb92c1e65". + Example release version: ".release+2f635dc265e78db6708f59f68e8009abb92c1e65". + Example modified release version: ".release+2f635dc265e78db6708f59f68e8009abb92c1e65".dirty + + :return: Found Airflow version in Git repo. + """ + try: + import git + + try: + repo = git.Repo(str(Path(self.root) / ".git")) + except git.NoSuchPathError: + log.warning(".git directory not found: Cannot compute the git version") + return "" + except git.InvalidGitRepositoryError: + log.warning("Invalid .git directory not found: Cannot compute the git version") + return "" + except ImportError: + log.warning("gitpython not found: Cannot compute the git version.") + return "" + if repo: + sha = repo.head.commit.hexsha + if repo.is_dirty(): + return f".dev0+{sha}.dirty" + # commit is clean + return f".release:{sha}" + return "no_git_version" + + def write_git_version(self) -> None: + """Write git version to git_version file.""" + version = self.get_git_version() + git_version_file = Path(self.root) / "airflow" / "git_version" + self.app.display(f"Writing version {version} to {git_version_file}") + git_version_file.write_text(version) + + +# List of pre-installed providers that are dynamically added to generated standard wheel packages +# That are released in PyPI. Those packages are not present in pyproject.toml as dependencies, and +# they are not installed when you install Airflow for editable installation for development. +# This way, when you develop Airflow you can work on Airflow and Providers together from the same +# Source tree - without polluting your editable installation with installed provider packages. +PREINSTALLED_PROVIDERS = [ + "apache-airflow-providers-http", + "apache-airflow-providers-common-io", + "apache-airflow-providers-common-sql", + "apache-airflow-providers-ftp", + "apache-airflow-providers-http", + "apache-airflow-providers-imap", + "apache-airflow-providers-sqlite", +] + + +class CustomBuildHook(BuildHookInterface[BuilderConfig]): + """Custom build hook for Airflow - remove devel extras and adds preinstalled providers.""" + + def initialize(self, version: str, build_data: dict[str, Any]) -> None: + """ + This occurs immediately before each build. + + Any modifications to the build data will be seen by the build target. + """ + if version == "standard": + # remove devel dependencies from optional dependencies for standard packages + self.metadata.core._optional_dependencies = { + key: value + for (key, value) in self.metadata.core.optional_dependencies.items() + if not key.startswith("devel") and key not in ["doc", "doc_gen"] + } + # Inject preinstalled providers into the dependencies for standard packages + for provider in PREINSTALLED_PROVIDERS: + self.metadata.core._dependencies.append(provider) diff --git a/images/breeze/output_ci-image_build.svg b/images/breeze/output_ci-image_build.svg index 70960aa673af1..b6d63b0465846 100644 --- a/images/breeze/output_ci-image_build.svg +++ b/images/breeze/output_ci-image_build.svg @@ -1,4 +1,4 @@ - +
into
and breaks our doc formatting + # By adding a lot of whitespace separation. This limit can be lifted when we update our doc to handle + #
tags for sections + "docutils<0.17.0", + "sphinx-airflow-theme", + "sphinx-argparse>=0.1.13", + # sphinx-autoapi fails with astroid 3.0, see: https://github.com/readthedocs/sphinx-autoapi/issues/407 + # This was fixed in sphinx-autoapi 3.0, however it has requirement sphinx>=6.1, but we stuck on 5.x + "sphinx-autoapi>=2.0.0", + "sphinx-copybutton", + "sphinx-design>=0.5.0", + "sphinx-jinja>=2.0", + "sphinx-rtd-theme>=0.1.6", + "sphinx>=5.2.0", + "sphinxcontrib-httpdomain>=1.7.0", + "sphinxcontrib-redoc>=1.6.0", + "sphinxcontrib-spelling>=7.3", +] +doc_gen = [ + "apache-airflow[doc]", + "eralchemy2", +] +# END OF doc extras +# START OF bundle extras +all_dbs = [ + "apache-airflow[apache_cassandra]", + "apache-airflow[apache_drill]", + "apache-airflow[apache_druid]", + "apache-airflow[apache_hdfs]", + "apache-airflow[apache_hive]", + "apache-airflow[apache_impala]", + "apache-airflow[apache_pinot]", + "apache-airflow[arangodb]", + "apache-airflow[cloudant]", + "apache-airflow[databricks]", + "apache-airflow[exasol]", + "apache-airflow[influxdb]", + "apache-airflow[microsoft_mssql]", + "apache-airflow[mongo]", + "apache-airflow[mysql]", + "apache-airflow[neo4j]", + "apache-airflow[postgres]", + "apache-airflow[presto]", + "apache-airflow[trino]", + "apache-airflow[vertica]", +] +devel = [ + "apache-airflow[aiobotocore]", + "apache-airflow[devel_mypy]", + "apache-airflow[doc]", + "apache-airflow[editable_cncf_kubernetes]", + "apache-airflow[pandas]", + "apache-airflow[password]", + "apache-airflow[s3fs]", +] +devel_all_dbs = [ + "apache-airflow[editable_apache_cassandra]", + "apache-airflow[editable_apache_drill]", + "apache-airflow[editable_apache_druid]", + "apache-airflow[editable_apache_hdfs]", + "apache-airflow[editable_apache_hive]", + "apache-airflow[editable_apache_impala]", + "apache-airflow[editable_apache_pinot]", + "apache-airflow[editable_arangodb]", + "apache-airflow[editable_cloudant]", + "apache-airflow[editable_databricks]", + "apache-airflow[editable_exasol]", + "apache-airflow[editable_influxdb]", + "apache-airflow[editable_microsoft_mssql]", + "apache-airflow[editable_mongo]", + "apache-airflow[editable_mysql]", + "apache-airflow[editable_neo4j]", + "apache-airflow[editable_postgres]", + "apache-airflow[editable_presto]", + "apache-airflow[editable_trino]", + "apache-airflow[editable_vertica]", +] +devel_ci = [ + "apache-airflow[devel_all]", +] +devel_hadoop = [ + "apache-airflow[devel]", + "apache-airflow[editable_apache_hdfs]", + "apache-airflow[editable_apache_hive]", + "apache-airflow[editable_apache_impala]", + "apache-airflow[editable_hdfs]", + "apache-airflow[editable_presto]", + "apache-airflow[kerberos]", +] +# END OF bundle extras +############################################################################################################# +# The whole section can be removed in Airflow 3.0 as those old aliases are deprecated in 2.* series +############################################################################################################# +# START OF deprecated extras +atlas = [ + "apache-airflow[apache_atlas]", +] +aws = [ + "apache-airflow[amazon]", +] +azure = [ + "apache-airflow[microsoft_azure]", +] +cassandra = [ + "apache-airflow[apache_cassandra]", +] +# Empty alias extra just for backward compatibility with Airflow 1.10 +crypto = [ +] +druid = [ + "pache-airflow[apache_druid]", +] +gcp = [ + "apache-airflow[google]", +] +gcp_api = [ + "apache-airflow[google]", +] +hdfs = [ + "apache-airflow[apache_hdfs]", +] +hive = [ + "apache-airflow[apache_hive]", +] +kubernetes = [ + "apache-airflow[cncf_kubernetes]", +] +mssql = [ + "apache-airflow[microsoft_mssql]", +] +pinot = [ + "apache-airflow[apache_pinot]", +] +s3 = [ + "apache-airflow[amazon]", +] +spark = [ + "apache-airflow[apache_spark]", +] +webhdfs = [ + "apache-airflow[apache_webhdfs]", +] +winrm = [ + "apache-airflow[microsoft_winrm]", +] +# END OF deprecated extras +############################################################################################################# +# The whole section below is automatically generated by `update-providers-dependencies` pre-commit based +# on `provider.yaml` files present in the `providers` subdirectories. The `provider.yaml` files are +# A single source of truth for provider dependencies, +# +# PLEASE DO NOT MODIFY THIS SECTION MANUALLY. IT WILL BE OVERWRITTEN BY PRE-COMMIT !! +# If you want to modify these - modify the corresponding provider.yaml instead. +############################################################################################################# +# START OF GENERATED DEPENDENCIES +airbyte = [ + "apache-airflow-providers-airbyte", +] +alibaba = [ + "apache-airflow-providers-alibaba", +] +amazon = [ + "apache-airflow-providers-amazon", +] +apache_beam = [ + "apache-airflow-providers-apache-beam", +] +apache_cassandra = [ + "apache-airflow-providers-apache-cassandra", +] +apache_drill = [ + "apache-airflow-providers-apache-drill", +] +apache_druid = [ + "apache-airflow-providers-apache-druid", +] +apache_flink = [ + "apache-airflow-providers-apache-flink", +] +apache_hdfs = [ + "apache-airflow-providers-apache-hdfs", +] +apache_hive = [ + "apache-airflow-providers-apache-hive", +] +apache_impala = [ + "apache-airflow-providers-apache-impala", +] +apache_kafka = [ + "apache-airflow-providers-apache-kafka", +] +apache_kylin = [ + "apache-airflow-providers-apache-kylin", +] +apache_livy = [ + "apache-airflow-providers-apache-livy", +] +apache_pig = [ + "apache-airflow-providers-apache-pig", +] +apache_pinot = [ + "apache-airflow-providers-apache-pinot", +] +apache_spark = [ + "apache-airflow-providers-apache-spark", +] +apprise = [ + "apache-airflow-providers-apprise", +] +arangodb = [ + "apache-airflow-providers-arangodb", +] +asana = [ + "apache-airflow-providers-asana", +] +atlassian_jira = [ + "apache-airflow-providers-atlassian-jira", +] +celery = [ + "apache-airflow-providers-celery", +] +cloudant = [ + "apache-airflow-providers-cloudant", +] +cncf_kubernetes = [ + "apache-airflow-providers-cncf-kubernetes", +] +cohere = [ + "apache-airflow-providers-cohere", +] +common_io = [ + "apache-airflow-providers-common-io", +] +common_sql = [ + "apache-airflow-providers-common-sql", +] +databricks = [ + "apache-airflow-providers-databricks", +] +datadog = [ + "apache-airflow-providers-datadog", +] +dbt_cloud = [ + "apache-airflow-providers-dbt-cloud", +] +dingding = [ + "apache-airflow-providers-dingding", +] +discord = [ + "apache-airflow-providers-discord", +] +docker = [ + "apache-airflow-providers-docker", +] +elasticsearch = [ + "apache-airflow-providers-elasticsearch", +] +exasol = [ + "apache-airflow-providers-exasol", +] +fab = [ + "apache-airflow-providers-fab", +] +facebook = [ + "apache-airflow-providers-facebook", +] +ftp = [ + "apache-airflow-providers-ftp", +] +github = [ + "apache-airflow-providers-github", +] +google = [ + "apache-airflow-providers-google", +] +grpc = [ + "apache-airflow-providers-grpc", +] +hashicorp = [ + "apache-airflow-providers-hashicorp", +] +http = [ + "apache-airflow-providers-http", +] +imap = [ + "apache-airflow-providers-imap", +] +influxdb = [ + "apache-airflow-providers-influxdb", +] +jdbc = [ + "apache-airflow-providers-jdbc", +] +jenkins = [ + "apache-airflow-providers-jenkins", +] +microsoft_azure = [ + "apache-airflow-providers-microsoft-azure", +] +microsoft_mssql = [ + "apache-airflow-providers-microsoft-mssql", +] +microsoft_psrp = [ + "apache-airflow-providers-microsoft-psrp", +] +microsoft_winrm = [ + "apache-airflow-providers-microsoft-winrm", +] +mongo = [ + "apache-airflow-providers-mongo", +] +mysql = [ + "apache-airflow-providers-mysql", +] +neo4j = [ + "apache-airflow-providers-neo4j", +] +odbc = [ + "apache-airflow-providers-odbc", +] +openai = [ + "apache-airflow-providers-openai", +] +openfaas = [ + "apache-airflow-providers-openfaas", +] +openlineage = [ + "apache-airflow-providers-openlineage", +] +opensearch = [ + "apache-airflow-providers-opensearch", +] +opsgenie = [ + "apache-airflow-providers-opsgenie", +] +oracle = [ + "apache-airflow-providers-oracle", +] +pagerduty = [ + "apache-airflow-providers-pagerduty", +] +papermill = [ + "apache-airflow-providers-papermill", +] +pgvector = [ + "apache-airflow-providers-pgvector", +] +pinecone = [ + "apache-airflow-providers-pinecone", +] +postgres = [ + "apache-airflow-providers-postgres", +] +presto = [ + "apache-airflow-providers-presto", +] +redis = [ + "apache-airflow-providers-redis", +] +salesforce = [ + "apache-airflow-providers-salesforce", +] +samba = [ + "apache-airflow-providers-samba", +] +segment = [ + "apache-airflow-providers-segment", +] +sendgrid = [ + "apache-airflow-providers-sendgrid", +] +sftp = [ + "apache-airflow-providers-sftp", +] +singularity = [ + "apache-airflow-providers-singularity", +] +slack = [ + "apache-airflow-providers-slack", +] +smtp = [ + "apache-airflow-providers-smtp", +] +snowflake = [ + "apache-airflow-providers-snowflake", +] +sqlite = [ + "apache-airflow-providers-sqlite", +] +ssh = [ + "apache-airflow-providers-ssh", +] +tableau = [ + "apache-airflow-providers-tableau", +] +tabular = [ + "apache-airflow-providers-tabular", +] +telegram = [ + "apache-airflow-providers-telegram", +] +trino = [ + "apache-airflow-providers-trino", +] +vertica = [ + "apache-airflow-providers-vertica", +] +weaviate = [ + "apache-airflow-providers-weaviate", +] +yandex = [ + "apache-airflow-providers-yandex", +] +zendesk = [ + "apache-airflow-providers-zendesk", +] +all = [ + # core extras + "apache-airflow[aiobotocore]", + "apache-airflow[async]", + "apache-airflow[cgroups]", + "apache-airflow[deprecated_api]", + "apache-airflow[github_enterprise]", + "apache-airflow[google_auth]", + "apache-airflow[graphviz]", + "apache-airflow[kerberos]", + "apache-airflow[ldap]", + "apache-airflow[leveldb]", + "apache-airflow[otel]", + "apache-airflow[pandas]", + "apache-airflow[password]", + "apache-airflow[rabbitmq]", + "apache-airflow[s3fs]", + "apache-airflow[saml]", + "apache-airflow[sentry]", + "apache-airflow[statsd]", + "apache-airflow[virtualenv]", + # Apache no provider extras + "apache-airflow[apache_atlas]", + "apache-airflow[apache_webhdfs]", + # Provider extras + "apache-airflow[airbyte]", + "apache-airflow[alibaba]", + "apache-airflow[amazon]", + "apache-airflow[apache_beam]", + "apache-airflow[apache_cassandra]", + "apache-airflow[apache_drill]", + "apache-airflow[apache_druid]", + "apache-airflow[apache_flink]", + "apache-airflow[apache_hdfs]", + "apache-airflow[apache_hive]", + "apache-airflow[apache_impala]", + "apache-airflow[apache_kafka]", + "apache-airflow[apache_kylin]", + "apache-airflow[apache_livy]", + "apache-airflow[apache_pig]", + "apache-airflow[apache_pinot]", + "apache-airflow[apache_spark]", + "apache-airflow[apprise]", + "apache-airflow[arangodb]", + "apache-airflow[asana]", + "apache-airflow[atlassian_jira]", + "apache-airflow[celery]", + "apache-airflow[cloudant]", + "apache-airflow[cncf_kubernetes]", + "apache-airflow[cohere]", + "apache-airflow[common_io]", + "apache-airflow[common_sql]", + "apache-airflow[databricks]", + "apache-airflow[datadog]", + "apache-airflow[dbt_cloud]", + "apache-airflow[dingding]", + "apache-airflow[discord]", + "apache-airflow[docker]", + "apache-airflow[elasticsearch]", + "apache-airflow[exasol]", + "apache-airflow[fab]", + "apache-airflow[facebook]", + "apache-airflow[ftp]", + "apache-airflow[github]", + "apache-airflow[google]", + "apache-airflow[grpc]", + "apache-airflow[hashicorp]", + "apache-airflow[http]", + "apache-airflow[imap]", + "apache-airflow[influxdb]", + "apache-airflow[jdbc]", + "apache-airflow[jenkins]", + "apache-airflow[microsoft_azure]", + "apache-airflow[microsoft_mssql]", + "apache-airflow[microsoft_psrp]", + "apache-airflow[microsoft_winrm]", + "apache-airflow[mongo]", + "apache-airflow[mysql]", + "apache-airflow[neo4j]", + "apache-airflow[odbc]", + "apache-airflow[openai]", + "apache-airflow[openfaas]", + "apache-airflow[openlineage]", + "apache-airflow[opensearch]", + "apache-airflow[opsgenie]", + "apache-airflow[oracle]", + "apache-airflow[pagerduty]", + "apache-airflow[papermill]", + "apache-airflow[pgvector]", + "apache-airflow[pinecone]", + "apache-airflow[postgres]", + "apache-airflow[presto]", + "apache-airflow[redis]", + "apache-airflow[salesforce]", + "apache-airflow[samba]", + "apache-airflow[segment]", + "apache-airflow[sendgrid]", + "apache-airflow[sftp]", + "apache-airflow[singularity]", + "apache-airflow[slack]", + "apache-airflow[smtp]", + "apache-airflow[snowflake]", + "apache-airflow[sqlite]", + "apache-airflow[ssh]", + "apache-airflow[tableau]", + "apache-airflow[tabular]", + "apache-airflow[telegram]", + "apache-airflow[trino]", + "apache-airflow[vertica]", + "apache-airflow[weaviate]", + "apache-airflow[yandex]", + "apache-airflow[zendesk]", +] +editable_airbyte = [ + "apache-airflow[editable_http]", +] +editable_alibaba = [ + "alibabacloud_adb20211201>=1.0.0", + "alibabacloud_tea_openapi>=0.3.7", + "oss2>=2.14.0", +] +editable_amazon = [ + "apache-airflow[editable_common_sql]", + "apache-airflow[editable_http]", + "asgiref", + "boto3>=1.28.0", + "botocore>=1.31.0", + "inflection>=0.5.1", + "jsonpath_ng>=1.5.3", + "redshift_connector>=2.0.888", + "sqlalchemy_redshift>=0.8.6", + "watchtower>=2.0.1,<4", +] +editable_apache_beam = [ + "apache-beam>=2.47.0", +] +editable_apache_cassandra = [ + "cassandra-driver>=3.13.0", +] +editable_apache_drill = [ + "apache-airflow[editable_common_sql]", + "sqlalchemy-drill>=1.1.0", +] +editable_apache_druid = [ + "apache-airflow[editable_common_sql]", + "pydruid>=0.4.1", +] +editable_apache_flink = [ + "apache-airflow[editable_cncf_kubernetes]", + "cryptography>=2.0.0", +] +editable_apache_hdfs = [ + "hdfs[avro,dataframe,kerberos]>=2.0.4", +] +editable_apache_hive = [ + "apache-airflow[editable_common_sql]", + "hmsclient>=0.1.0", + "pandas>=0.17.1", + "pyhive[hive_pure_sasl]>=0.7.0", + "thrift>=0.9.2", +] +editable_apache_impala = [ + "impyla>=0.18.0,<1.0", +] +editable_apache_kafka = [ + "asgiref", + "confluent-kafka>=1.8.2", +] +editable_apache_kylin = [ + "kylinpy>=2.6", +] +editable_apache_livy = [ + "aiohttp", + "apache-airflow[editable_http]", + "asgiref", +] +editable_apache_pig = [ +] +editable_apache_pinot = [ + "apache-airflow[editable_common_sql]", + "pinotdb>0.4.7", +] +editable_apache_spark = [ + "grpcio-status", + "pyspark", +] +editable_apprise = [ + "apprise", +] +editable_arangodb = [ + "python-arango>=7.3.2", +] +editable_asana = [ + "asana>=0.10,<4.0.0", +] +editable_atlassian_jira = [ + "atlassian-python-api>=1.14.2", +] +editable_celery = [ + "celery>=5.3.0,<6,!=5.3.3,!=5.3.2", + "flower>=1.0.0", + "google-re2>=1.0", +] +editable_cloudant = [ + "cloudant>=2.0", +] +editable_cncf_kubernetes = [ + "aiofiles>=23.2.0", + "asgiref>=3.5.2", + "cryptography>=2.0.0", + "google-re2>=1.0", + "kubernetes>=21.7.0,<24", + "kubernetes_asyncio>=18.20.1,<25", +] +editable_cohere = [ + "cohere>=4.27", +] +editable_common_io = [ +] +editable_common_sql = [ + "sqlparse>=0.4.2", +] +editable_databricks = [ + "aiohttp>=3.6.3, <4", + "apache-airflow[editable_common_sql]", + "databricks-sql-connector>=2.0.0, <3.0.0, !=2.9.0", + "requests>=2.27,<3", +] +editable_datadog = [ + "datadog>=0.14.0", +] +editable_dbt_cloud = [ + "aiohttp", + "apache-airflow[editable_http]", + "asgiref", +] +editable_dingding = [ + "apache-airflow[editable_http]", +] +editable_discord = [ + "apache-airflow[editable_http]", +] +editable_docker = [ + "docker>=5.0.3", + "python-dotenv>=0.21.0", +] +editable_elasticsearch = [ + "apache-airflow[editable_common_sql]", + "elasticsearch>=8.10,<9", +] +editable_exasol = [ + "apache-airflow[editable_common_sql]", + "pandas>=0.17.1", + "pyexasol>=0.5.1", +] +editable_fab = [ + "flask-appbuilder==4.3.10", + "flask-login>=0.6.2", + "flask>=2.2,<2.3", + "google-re2>=1.0", +] +editable_facebook = [ + "facebook-business>=6.0.2", +] +editable_ftp = [ +] +editable_github = [ + "PyGithub!=1.58", +] +editable_google = [ + "PyOpenSSL", + "apache-airflow[editable_common_sql]", + "asgiref>=3.5.2", + "gcloud-aio-auth>=4.0.0,<5.0.0", + "gcloud-aio-bigquery>=6.1.2", + "gcloud-aio-storage>=9.0.0", + "gcsfs>=2023.10.0", + "google-ads>=22.1.0", + "google-api-core>=2.11.0", + "google-api-python-client>=1.6.0", + "google-auth-httplib2>=0.0.1", + "google-auth>=1.0.0", + "google-cloud-aiplatform>=1.22.1", + "google-cloud-automl>=2.12.0", + "google-cloud-batch>=0.13.0", + "google-cloud-bigquery-datatransfer>=3.13.0", + "google-cloud-bigtable>=2.17.0", + "google-cloud-build>=3.22.0", + "google-cloud-compute>=1.10.0", + "google-cloud-container>=2.17.4", + "google-cloud-datacatalog>=3.11.1", + "google-cloud-dataflow-client>=0.8.6", + "google-cloud-dataform>=0.5.0", + "google-cloud-dataplex>=1.10.0", + "google-cloud-dataproc-metastore>=1.12.0", + "google-cloud-dataproc>=5.8.0", + "google-cloud-dlp>=3.12.0", + "google-cloud-kms>=2.15.0", + "google-cloud-language>=2.9.0", + "google-cloud-logging>=3.5.0", + "google-cloud-memcache>=1.7.0", + "google-cloud-monitoring>=2.18.0", + "google-cloud-orchestration-airflow>=1.10.0", + "google-cloud-os-login>=2.9.1", + "google-cloud-pubsub>=2.19.0", + "google-cloud-redis>=2.12.0", + "google-cloud-run>=0.9.0", + "google-cloud-secret-manager>=2.16.0", + "google-cloud-spanner>=3.11.1", + "google-cloud-speech>=2.18.0", + "google-cloud-storage-transfer>=1.4.1", + "google-cloud-storage>=2.7.0", + "google-cloud-tasks>=2.13.0", + "google-cloud-texttospeech>=2.14.1", + "google-cloud-translate>=3.11.0", + "google-cloud-videointelligence>=2.11.0", + "google-cloud-vision>=3.4.0", + "google-cloud-workflows>=1.10.0", + "grpcio-gcp>=0.2.2", + "httpx", + "json-merge-patch>=0.2", + "looker-sdk>=22.2.0", + "pandas-gbq", + "pandas>=0.17.1", + "proto-plus>=1.19.6", + "sqlalchemy-bigquery>=1.2.1", + "sqlalchemy-spanner>=1.6.2", +] +editable_grpc = [ + "google-auth-httplib2>=0.0.1", + "google-auth>=1.0.0, <3.0.0", + "grpcio>=1.15.0", +] +editable_hashicorp = [ + "hvac>=0.10", +] +editable_http = [ + "aiohttp", + "asgiref", + "requests>=2.26.0", + "requests_toolbelt", +] +editable_imap = [ +] +editable_influxdb = [ + "influxdb-client>=1.19.0", + "requests>=2.26.0", +] +editable_jdbc = [ + "apache-airflow[editable_common_sql]", + "jaydebeapi>=1.1.1", +] +editable_jenkins = [ + "python-jenkins>=1.0.0", +] +editable_microsoft_azure = [ + "adal>=1.2.7", + "adlfs>=2023.10.0", + "azure-batch>=8.0.0", + "azure-cosmos>=4.0.0", + "azure-datalake-store>=0.0.45", + "azure-identity>=1.3.1", + "azure-keyvault-secrets>=4.1.0", + "azure-kusto-data>=4.1.0", + "azure-mgmt-containerinstance>=9.0.0", + "azure-mgmt-containerregistry>=8.0.0", + "azure-mgmt-cosmosdb", + "azure-mgmt-datafactory>=2.0.0", + "azure-mgmt-datalake-store>=0.5.0", + "azure-mgmt-resource>=2.2.0", + "azure-mgmt-storage>=16.0.0", + "azure-servicebus>=7.6.1", + "azure-storage-blob>=12.14.0", + "azure-storage-file-datalake>=12.9.1", + "azure-storage-file-share", + "azure-synapse-artifacts>=0.17.0", + "azure-synapse-spark", +] +editable_microsoft_mssql = [ + "apache-airflow[editable_common_sql]", + "pymssql>=2.1.8", +] +editable_microsoft_psrp = [ + "pypsrp>=0.8.0", +] +editable_microsoft_winrm = [ + "pywinrm>=0.4", +] +editable_mongo = [ + "dnspython>=1.13.0", + "pymongo>=3.6.0", +] +editable_mysql = [ + "apache-airflow[editable_common_sql]", + "mysql-connector-python>=8.0.11", + "mysqlclient>=1.3.6", +] +editable_neo4j = [ + "neo4j>=4.2.1", +] +editable_odbc = [ + "apache-airflow[editable_common_sql]", + "pyodbc", +] +editable_openai = [ + "openai[datalib]>=1.0", +] +editable_openfaas = [ +] +editable_openlineage = [ + "apache-airflow[editable_common_sql]", + "attrs>=22.2", + "openlineage-integration-common>=0.28.0", + "openlineage-python>=0.28.0", +] +editable_opensearch = [ + "opensearch-py>=2.2.0", +] +editable_opsgenie = [ + "opsgenie-sdk>=2.1.5", +] +editable_oracle = [ + "apache-airflow[editable_common_sql]", + "oracledb>=1.0.0", +] +editable_pagerduty = [ + "pdpyras>=4.1.2", +] +editable_papermill = [ + "ipykernel", + "papermill[all]>=1.2.1", + "scrapbook[all]", +] +editable_pgvector = [ + "apache-airflow[editable_postgres]", + "pgvector>=0.2.3", +] +editable_pinecone = [ + "pinecone-client>=2.2.4", +] +editable_postgres = [ + "apache-airflow[editable_common_sql]", + "psycopg2-binary>=2.8.0", +] +editable_presto = [ + "apache-airflow[editable_common_sql]", + "pandas>=0.17.1", + "presto-python-client>=0.8.4", +] +editable_redis = [ + "redis>=4.5.2,<5.0.0,!=4.5.5", +] +editable_salesforce = [ + "pandas>=0.17.1", + "simple-salesforce>=1.0.0", +] +editable_samba = [ + "smbprotocol>=1.5.0", +] +editable_segment = [ + "analytics-python>=1.2.9", +] +editable_sendgrid = [ + "sendgrid>=6.0.0", +] +editable_sftp = [ + "apache-airflow[editable_ssh]", + "paramiko>=2.8.0", +] +editable_singularity = [ + "spython>=0.0.56", +] +editable_slack = [ + "apache-airflow[editable_common_sql]", + "slack_sdk>=3.0.0", +] +editable_smtp = [ +] +editable_snowflake = [ + "apache-airflow[editable_common_sql]", + "snowflake-connector-python>=2.7.8", + "snowflake-sqlalchemy>=1.1.0", +] +editable_sqlite = [ + "apache-airflow[editable_common_sql]", +] +editable_ssh = [ + "paramiko>=2.6.0", + "sshtunnel>=0.3.2", +] +editable_tableau = [ + "tableauserverclient", +] +editable_tabular = [ +] +editable_telegram = [ + "python-telegram-bot>=20.0.0", +] +editable_trino = [ + "apache-airflow[editable_common_sql]", + "pandas>=0.17.1", + "trino>=0.318.0", +] +editable_vertica = [ + "apache-airflow[editable_common_sql]", + "vertica-python>=0.5.1", +] +editable_weaviate = [ + "pandas>=0.17.1", + "weaviate-client>=3.24.2", +] +editable_yandex = [ + "yandexcloud>=0.228.0", +] +editable_zendesk = [ + "zenpy>=2.0.24", +] +devel_all = [ + "apache-airflow[devel]", + "apache-airflow[doc]", + "apache-airflow[doc_gen]", + "apache-airflow[saml]", + # Apache no provider extras + "apache-airflow[apache_atlas]", + "apache-airflow[apache_webhdfs]", + # Include all manually added devel extras + # Include all provider deps + "apache-airflow[devel_amazon]", + "apache-airflow[devel_azure]", + "apache-airflow[devel_breeze]", + "apache-airflow[devel_debuggers]", + "apache-airflow[devel_deltalake]", + "apache-airflow[devel_devscripts]", + "apache-airflow[devel_duckdb]", + "apache-airflow[devel_iceberg]", + "apache-airflow[devel_mongo]", + "apache-airflow[devel_mypy]", + "apache-airflow[devel_sentry]", + "apache-airflow[devel_static_checks]", + "apache-airflow[devel_tests]", + "apache-airflow[devel_all_dbs]", + "apache-airflow[editable_airbyte]", + "apache-airflow[editable_alibaba]", + "apache-airflow[editable_amazon]", + "apache-airflow[editable_apache_beam]", + "apache-airflow[editable_apache_cassandra]", + "apache-airflow[editable_apache_drill]", + "apache-airflow[editable_apache_druid]", + "apache-airflow[editable_apache_flink]", + "apache-airflow[editable_apache_hdfs]", + "apache-airflow[editable_apache_hive]", + "apache-airflow[editable_apache_impala]", + "apache-airflow[editable_apache_kafka]", + "apache-airflow[editable_apache_kylin]", + "apache-airflow[editable_apache_livy]", + "apache-airflow[editable_apache_pig]", + "apache-airflow[editable_apache_pinot]", + "apache-airflow[editable_apache_spark]", + "apache-airflow[editable_apprise]", + "apache-airflow[editable_arangodb]", + "apache-airflow[editable_asana]", + "apache-airflow[editable_atlassian_jira]", + "apache-airflow[editable_celery]", + "apache-airflow[editable_cloudant]", + "apache-airflow[editable_cncf_kubernetes]", + "apache-airflow[editable_cohere]", + "apache-airflow[editable_common_io]", + "apache-airflow[editable_common_sql]", + "apache-airflow[editable_databricks]", + "apache-airflow[editable_datadog]", + "apache-airflow[editable_dbt_cloud]", + "apache-airflow[editable_dingding]", + "apache-airflow[editable_discord]", + "apache-airflow[editable_docker]", + "apache-airflow[editable_elasticsearch]", + "apache-airflow[editable_exasol]", + "apache-airflow[editable_fab]", + "apache-airflow[editable_facebook]", + "apache-airflow[editable_ftp]", + "apache-airflow[editable_github]", + "apache-airflow[editable_google]", + "apache-airflow[editable_grpc]", + "apache-airflow[editable_hashicorp]", + "apache-airflow[editable_http]", + "apache-airflow[editable_imap]", + "apache-airflow[editable_influxdb]", + "apache-airflow[editable_jdbc]", + "apache-airflow[editable_jenkins]", + "apache-airflow[editable_microsoft_azure]", + "apache-airflow[editable_microsoft_mssql]", + "apache-airflow[editable_microsoft_psrp]", + "apache-airflow[editable_microsoft_winrm]", + "apache-airflow[editable_mongo]", + "apache-airflow[editable_mysql]", + "apache-airflow[editable_neo4j]", + "apache-airflow[editable_odbc]", + "apache-airflow[editable_openai]", + "apache-airflow[editable_openfaas]", + "apache-airflow[editable_openlineage]", + "apache-airflow[editable_opensearch]", + "apache-airflow[editable_opsgenie]", + "apache-airflow[editable_oracle]", + "apache-airflow[editable_pagerduty]", + "apache-airflow[editable_papermill]", + "apache-airflow[editable_pgvector]", + "apache-airflow[editable_pinecone]", + "apache-airflow[editable_postgres]", + "apache-airflow[editable_presto]", + "apache-airflow[editable_redis]", + "apache-airflow[editable_salesforce]", + "apache-airflow[editable_samba]", + "apache-airflow[editable_segment]", + "apache-airflow[editable_sendgrid]", + "apache-airflow[editable_sftp]", + "apache-airflow[editable_singularity]", + "apache-airflow[editable_slack]", + "apache-airflow[editable_smtp]", + "apache-airflow[editable_snowflake]", + "apache-airflow[editable_sqlite]", + "apache-airflow[editable_ssh]", + "apache-airflow[editable_tableau]", + "apache-airflow[editable_tabular]", + "apache-airflow[editable_telegram]", + "apache-airflow[editable_trino]", + "apache-airflow[editable_vertica]", + "apache-airflow[editable_weaviate]", + "apache-airflow[editable_yandex]", + "apache-airflow[editable_zendesk]", +] +# END OF GENERATED DEPENDENCIES +############################################################################################################# +# The rest of the pyproject.toml file should be manually maintained +############################################################################################################# +[project.scripts] +airflow = "airflow.__main__:main" +[project.urls] +"Bug Tracker" = "https://github.com/apache/airflow/issues" +Documentation = "https://airflow.apache.org/docs/" +Downloads = "https://archive.apache.org/dist/airflow/" +Homepage = "https://airflow.apache.org/" +"Release Notes" = "https://airflow.apache.org/docs/apache-airflow/stable/release_notes.html" +"Slack Chat" = "https://s.apache.org/airflow-slack" +"Source Code" = "https://github.com/apache/airflow" +Twitter = "https://twitter.com/ApacheAirflow" +YouTube = "https://www.youtube.com/channel/UCSXwxpWZQ7XZ1WL3wqevChA/" + +[tool.hatch.envs.default] +python = "3.8" +platforms = ["linux", "macos"] +description = "Default environment with Python 3.8 for maximum compatibility" +extra-dependencies = [ + "apache-airflow[devel]", +] + +[tool.hatch.envs.airflow-38] +python = "3.8" +platforms = ["linux", "macos"] +description = "Environment with Python 3.8" +extra-dependencies = [ + "apache-airflow[devel]", +] + +[tool.hatch.envs.airflow-39] +python = "3.9" +platforms = ["linux", "macos"] +description = "Environment with Python 3.9" +extra-dependencies = [ + "apache-airflow[devel]", +] + +[tool.hatch.envs.airflow-310] +python = "3.10" +platforms = ["linux", "macos"] +description = "Environment with Python 3.10" +extra-dependencies = [ + "apache-airflow[devel]", +] + +[tool.hatch.envs.airflow-311] +python = "3.11" +platforms = ["linux", "macos"] +description = "Environment with Python 3.11" +extra-dependencies = [ + "apache-airflow[devel]", +] + +[tool.hatch.version] +path = "airflow/__init__.py" + +[tool.hatch.build.targets.wheel.hooks.custom] + +[tool.hatch.build.targets.sdist] +include = [ + "/airflow", +] +exclude = [ + "/airflow/providers/", +] + +[tool.hatch.build.targets.wheel] +include = [ + "/airflow", +] +exclude = [ + "/airflow/providers/", +] +artifacts = [ + "/airflow/www/static/dist/" +] + [tool.black] line-length = 110 target-version = ['py38', 'py39', 'py310', 'py311'] -# Editable installs are currently broken using setuptools 64.0.0 and above. The problem is tracked in -# https://github.com/pypa/setuptools/issues/3548. We're also discussing how we could potentially fix -# this problem on our end in issue https://github.com/apache/airflow/issues/30764. Until then we need -# to use one of the following workarounds locally for editable installs: -# 1) Pin setuptools <= 63.4.3 below in the [build-system] section. -# 2) Include your airflow source code directory in PYTHONPATH. -[build-system] -requires = ['setuptools==67.2.0'] -build-backend = "setuptools.build_meta" - [tool.ruff] target-version = "py38" typing-modules = ["airflow.typing_compat"] @@ -47,7 +1499,6 @@ extend-select = [ "I", # Missing required import (auto-fixable) "UP", # Pyupgrade "RUF100", # Unused noqa (auto-fixable) - # We ignore more pydocstyle than we enable, so be more selective at what we enable "D101", "D106", @@ -188,3 +1639,35 @@ exclude_also = [ "@(typing(_extensions)?\\.)?overload", "if (typing(_extensions)?\\.)?TYPE_CHECKING:" ] + +[tool.mypy] +ignore_missing_imports = true +no_implicit_optional = true +warn_redundant_casts = true +warn_unused_ignores = false +plugins = [ + "dev.mypy.plugin.decorators", + "dev.mypy.plugin.outputs", +] +pretty = true +show_error_codes = true +disable_error_code = [ + "annotation-unchecked", +] + +[tool.mypy."airflow.config_templates.default_webserver_config"] +disable_error_code = [ + "var-annotated", +] + +[tool.mypy."airflow.migrations.*"] +ignore_errors = true + +[tool.mypy."google.cloud.*"] +no_implicit_optional = false + +[tool.mypy."azure.*"] +no_implicit_optional = false + +[tool.mypy."referencing.*"] +ignore_errors = true diff --git a/scripts/ci/docker-compose/devcontainer.env b/scripts/ci/docker-compose/devcontainer.env index 465e7ab13f6d2..4ef3fd045972d 100644 --- a/scripts/ci/docker-compose/devcontainer.env +++ b/scripts/ci/docker-compose/devcontainer.env @@ -46,7 +46,6 @@ HOST_OS="linux" INIT_SCRIPT_FILE="init.sh" INSTALL_AIRFLOW_VERSION= AIRFLOW_CONSTRAINTS_MODE= -INSTALL_PROVIDERS_FROM_SOURCES= INSTALL_SELECTED_PROVIDERS= USE_AIRFLOW_VERSION= USE_PACKAGES_FROM_DIST= diff --git a/scripts/ci/docker-compose/local.yml b/scripts/ci/docker-compose/local.yml index 1ee86fc7739ec..9258b3b504607 100644 --- a/scripts/ci/docker-compose/local.yml +++ b/scripts/ci/docker-compose/local.yml @@ -55,9 +55,6 @@ services: - type: bind source: ../../../LICENSE target: /opt/airflow/LICENSE - - type: bind - source: ../../../MANIFEST.in - target: /opt/airflow/MANIFEST.in - type: bind source: ../../../NOTICE target: /opt/airflow/NOTICE @@ -100,12 +97,6 @@ services: - type: bind source: ../../../scripts/docker/entrypoint_ci.sh target: /entrypoint - - type: bind - source: ../../../setup.cfg - target: /opt/airflow/setup.cfg - - type: bind - source: ../../../setup.py - target: /opt/airflow/setup.py - type: bind source: ../../../tests target: /opt/airflow/tests diff --git a/scripts/ci/pre_commit/pre_commit_check_extra_packages_ref.py b/scripts/ci/pre_commit/pre_commit_check_extra_packages_ref.py new file mode 100755 index 0000000000000..2a594817d19db --- /dev/null +++ b/scripts/ci/pre_commit/pre_commit_check_extra_packages_ref.py @@ -0,0 +1,107 @@ +#!/usr/bin/env python +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +Checks if all the libraries in setup.py are listed in installation.rst file +""" +from __future__ import annotations + +import re +import sys +from pathlib import Path + +from tabulate import tabulate + +# tomllib is available in Python 3.11+ and before that tomli offers same interface for parsing TOML files +try: + import tomllib +except ImportError: + import tomli as tomllib + + +AIRFLOW_ROOT_PATH = Path(__file__).parents[3].resolve() +EXTRA_PACKAGES_REF_FILE = AIRFLOW_ROOT_PATH / "docs" / "apache-airflow" / "extra-packages-ref.rst" +PYPROJECT_TOML_FILE_PATH = AIRFLOW_ROOT_PATH / "pyproject.toml" + +sys.path.insert(0, str(Path(__file__).parent.resolve())) # make sure common_precommit_utils is imported + +from common_precommit_utils import console + +pyproject_toml_content = tomllib.loads(PYPROJECT_TOML_FILE_PATH.read_text()) + +optional_dependencies: dict[str, list[str]] = pyproject_toml_content["project"]["optional-dependencies"] +doc_ref_content = EXTRA_PACKAGES_REF_FILE.read_text() + +errors: list[str] = [] +regular_suggestions: list[str] = [] +devel_suggestions: list[str] = [] +suggestions: list[tuple] = [] +suggestions_devel: list[tuple] = [] +suggestions_providers: list[tuple] = [] + +for dependency in optional_dependencies: + console.print(f"[bright_blue]Checking if {dependency} is mentioned in refs[/]") + find_matching = re.search(rf"^\| {dependency} *\|", doc_ref_content, flags=re.MULTILINE) + if not find_matching: + errors.append(f"[red]ERROR: {dependency} is not listed in {EXTRA_PACKAGES_REF_FILE}[/]") + is_devel_dep = dependency.startswith(("devel", "editable")) or dependency in ["doc", "doc_gen"] + is_provider_dep = dependency.startswith("editable_") + short_dep = dependency.replace("editable_", "").replace("devel_", "") + if not is_devel_dep: + suggestions.append( + ( + dependency, + f"pip install apache-airflow[{dependency}]", + f"{dependency.capitalize()} hooks and operators", + ) + ) + else: + if is_provider_dep: + suggestions_providers.append( + ( + dependency, + f"pip install -e '.[{dependency}]'", + f"Adds all libraries needed by the {short_dep} provider", + ) + ) + else: + suggestions_devel.append( + ( + dependency, + f"pip install -e '.[{dependency}]'", + f"Adds all test libraries needed to test {short_dep}", + ) + ) + +HEADERS = ["extra", "install command", "enables"] +if errors: + console.print("\n".join(errors)) + console.print() + console.print("[bright_blue]Suggested tables to add to references::[/]") + if suggestions: + console.print("[bright_blue]Regular dependencies[/]") + console.print(tabulate(suggestions, headers=HEADERS, tablefmt="grid"), markup=False) + if suggestions_devel: + console.print("[bright_blue]Devel dependencies[/]") + console.print(tabulate(suggestions_devel, headers=HEADERS, tablefmt="grid"), markup=False) + if suggestions_providers: + console.print("[bright_blue]Devel dependencies[/]") + console.print(tabulate(suggestions_providers, headers=HEADERS, tablefmt="grid"), markup=False) + sys.exit(1) +else: + console.print(f"[green]Checked: {len(optional_dependencies)} dependencies are mentioned[/]") diff --git a/scripts/ci/pre_commit/pre_commit_check_order_dockerfile_extras.py b/scripts/ci/pre_commit/pre_commit_check_order_dockerfile_extras.py index 31c8c88eae061..f386edfc0da05 100755 --- a/scripts/ci/pre_commit/pre_commit_check_order_dockerfile_extras.py +++ b/scripts/ci/pre_commit/pre_commit_check_order_dockerfile_extras.py @@ -17,7 +17,8 @@ # specific language governing permissions and limitations # under the License. """ -Test for an order of dependencies in setup.py +Test for an order of dependencies in pyproject.toml +TODO(potiuk) fix this test to use pyproject.toml """ from __future__ import annotations diff --git a/scripts/ci/pre_commit/pre_commit_check_order_pyproject_toml.py b/scripts/ci/pre_commit/pre_commit_check_order_pyproject_toml.py new file mode 100755 index 0000000000000..0fbdf357f3aaf --- /dev/null +++ b/scripts/ci/pre_commit/pre_commit_check_order_pyproject_toml.py @@ -0,0 +1,103 @@ +#!/usr/bin/env python +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +Test for an order of dependencies in setup.py +""" +from __future__ import annotations + +import re +import sys +from pathlib import Path + +from rich import print + +errors: list[str] = [] + +AIRFLOW_ROOT_PATH = Path(__file__).parents[3].resolve() +PYPROJECT_TOML_PATH = AIRFLOW_ROOT_PATH / "pyproject.toml" + +sys.path.insert(0, str(Path(__file__).parent.resolve())) # make sure common_precommit_utils is imported +from common_precommit_utils import check_list_sorted + + +def check_extras(type: str, extra: str, extras: list[str]) -> None: + r""" + Test for an order of dependencies in extra defined + `^dependent_group_name = [.*?]\n` in setup.py + """ + print(f"[info]Checking {type}:{extra}[/]") + extras = [extra.replace("[", "\\[") for extra in extras] + check_list_sorted(extras, f"Order of extra: {type}:{extra}", errors) + + +def extract_deps(content: str, extra: str) -> list[str]: + deps: list[str] = [] + extracting = False + for line in content.splitlines(): + line = line.strip() + if line.startswith("#"): + continue + if not extracting and line == f"{extra} = [": + extracting = True + elif extracting and line == "]": + break + elif extracting: + deps.append(line.strip().strip(",").strip('"')) + return deps + + +def check_type(pyproject_toml_contents: str, type: str) -> None: + """ + Test for an order of dependencies groups between mark + '# Start dependencies group' and '# End dependencies group' in setup.py + """ + print(f"[info]Checking {type}[/]") + pattern_type = re.compile(f"# START OF {type}\n(.*)# END OF {type}", re.DOTALL) + parsed_type_content = pattern_type.findall(pyproject_toml_contents)[0] + # strip comments + parsed_type_content = ( + "\n".join([line for line in parsed_type_content.splitlines() if not line.startswith("#")]) + "\n" + ) + pattern_extra_name = re.compile(r" = \[.*?]\n", re.DOTALL) + type_content = pattern_extra_name.sub(",", parsed_type_content) + + list_extra_names = type_content.strip(",").split(",") + check_list_sorted(list_extra_names, "Order of dependencies", errors) + for extra in list_extra_names: + deps_list = extract_deps(parsed_type_content, extra) + check_extras(type, extra, deps_list) + + +if __name__ == "__main__": + file_contents = PYPROJECT_TOML_PATH.read_text() + check_type(file_contents, "core extras") + check_type(file_contents, "Apache no provider extras") + check_type(file_contents, "devel extras") + check_type(file_contents, "doc extras") + check_type(file_contents, "bundle extras") + check_type(file_contents, "deprecated extras") + + print() + for error in errors: + print(error) + + print() + + if errors: + sys.exit(1) diff --git a/scripts/ci/pre_commit/pre_commit_check_order_setup.py b/scripts/ci/pre_commit/pre_commit_check_order_setup.py deleted file mode 100755 index 95bc8e59ee256..0000000000000 --- a/scripts/ci/pre_commit/pre_commit_check_order_setup.py +++ /dev/null @@ -1,135 +0,0 @@ -#!/usr/bin/env python -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -""" -Test for an order of dependencies in setup.py -""" -from __future__ import annotations - -import os -import re -import sys -from pathlib import Path - -from rich import print - -errors: list[str] = [] - -SOURCE_DIR_PATH = Path(__file__).parents[3].resolve() -sys.path.insert(0, os.fspath(SOURCE_DIR_PATH)) -sys.path.insert(0, str(Path(__file__).parent.resolve())) # make sure common_precommit_utils is imported -from common_precommit_utils import check_list_sorted - - -def check_main_dependent_group(setup_contents: str) -> None: - """ - Test for an order of dependencies groups between mark - '# Start dependencies group' and '# End dependencies group' in setup.py - """ - print("[info]Checking main dependency group[/]") - pattern_main_dependent_group = re.compile( - "# Start dependencies group\n(.*)# End dependencies group", re.DOTALL - ) - main_dependent_group = pattern_main_dependent_group.findall(setup_contents)[0] - - pattern_sub_dependent = re.compile(r" = \[.*?]\n", re.DOTALL) - main_dependent = pattern_sub_dependent.sub(",", main_dependent_group) - - src = main_dependent.strip(",").split(",") - check_list_sorted(src, "Order of dependencies", errors) - - for group in src: - check_sub_dependent_group(group) - - -def check_sub_dependent_group(group_name: str) -> None: - r""" - Test for an order of each dependencies groups declare like - `^dependent_group_name = [.*?]\n` in setup.py - """ - print(f"[info]Checking dependency group {group_name}[/]") - check_list_sorted(getattr(setup, group_name), f"Order of dependency group: {group_name}", errors) - - -def check_alias_dependent_group(setup_context: str) -> None: - """ - Test for an order of each dependencies groups declare like - `alias_dependent_group = dependent_group_1 + ... + dependent_group_n` in setup.py - """ - pattern = re.compile("^\\w+ = (\\w+ \\+.*)", re.MULTILINE) - dependents = pattern.findall(setup_context) - - for dependent in dependents: - print(f"[info]Checking alias-dependent group {dependent}[/]") - src = dependent.split(" + ") - check_list_sorted(src, f"Order of alias dependencies group: {dependent}", errors) - - -def check_variable_order(var_name: str) -> None: - print(f"[info]Checking {var_name}[/]") - - var = getattr(setup, var_name) - - if isinstance(var, dict): - check_list_sorted(list(var.keys()), f"Order of dependencies in: {var_name}", errors) - else: - check_list_sorted(var, f"Order of dependencies in: {var_name}", errors) - - -def check_install_and_setup_requires() -> None: - """ - Test for an order of dependencies in function do_setup section - install_requires and setup_requires in setup.cfg - """ - - from setuptools.config import read_configuration - - path = os.fspath(SOURCE_DIR_PATH / "setup.cfg") - config = read_configuration(path) - - pattern_dependent_version = re.compile("[~|><=;].*") - - for key in ("install_requires", "setup_requires"): - print(f"[info]Checking setup.cfg group {key}[/]") - deps = config["options"][key] - dists = [pattern_dependent_version.sub("", p) for p in deps] - check_list_sorted(dists, f"Order of dependencies in do_setup section: {key}", errors) - - -if __name__ == "__main__": - import setup - - with open(setup.__file__) as setup_file: - file_contents = setup_file.read() - check_main_dependent_group(file_contents) - check_alias_dependent_group(file_contents) - check_variable_order("CORE_EXTRAS_DEPENDENCIES") - check_variable_order("ADDITIONAL_EXTRAS_DEPENDENCIES") - check_variable_order("EXTRAS_DEPRECATED_ALIASES") - check_variable_order("PREINSTALLED_PROVIDERS") - check_install_and_setup_requires() - - print() - print() - for error in errors: - print(error) - - print() - - if errors: - sys.exit(1) diff --git a/scripts/ci/pre_commit/pre_commit_check_setup_extra_packages_ref.py b/scripts/ci/pre_commit/pre_commit_check_setup_extra_packages_ref.py deleted file mode 100755 index 0997fc008de9e..0000000000000 --- a/scripts/ci/pre_commit/pre_commit_check_setup_extra_packages_ref.py +++ /dev/null @@ -1,260 +0,0 @@ -#!/usr/bin/env python -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -""" -Checks if all the libraries in setup.py are listed in installation.rst file -""" -from __future__ import annotations - -import os -import re -import sys -from pathlib import Path - -from rich import print -from rich.console import Console -from rich.table import Table - -AIRFLOW_SOURCES_DIR = Path(__file__).parents[3].resolve() -SETUP_PY_FILE = "setup.py" -DOCS_FILE = os.path.join("docs", "apache-airflow", "extra-packages-ref.rst") -PY_IDENTIFIER = r"[a-zA-Z_][a-zA-Z0-9_\.]*" - -sys.path.insert(0, os.fspath(AIRFLOW_SOURCES_DIR)) - -os.environ["_SKIP_PYTHON_VERSION_CHECK"] = "true" - -from setup import ( - EXTRAS_DEPENDENCIES, - EXTRAS_DEPRECATED_ALIASES, - EXTRAS_DEPRECATED_ALIASES_IGNORED_FROM_REF_DOCS, - PREINSTALLED_PROVIDERS, - add_all_provider_packages, -) - - -def get_file_content(*path_elements: str) -> str: - file_path = AIRFLOW_SOURCES_DIR.joinpath(*path_elements) - return file_path.read_text() - - -def get_extras_from_setup() -> set[str]: - """Returns a set of regular (non-deprecated) extras from setup.""" - return ( - set(EXTRAS_DEPENDENCIES.keys()) - - set(EXTRAS_DEPRECATED_ALIASES.keys()) - - set(EXTRAS_DEPRECATED_ALIASES_IGNORED_FROM_REF_DOCS) - ) - - -def get_extras_from_docs() -> set[str]: - """ - Returns a list of extras from airflow.docs. - """ - docs_content = get_file_content(DOCS_FILE) - extras_section_regex = re.compile( - rf"\|[^|]+\|.*pip install .apache-airflow\[({PY_IDENTIFIER})][^|]+\|[^|]+\|", - re.MULTILINE, - ) - doc_extra_set: set[str] = set() - for doc_extra in extras_section_regex.findall(docs_content): - doc_extra_set.add(doc_extra) - return doc_extra_set - - -def get_preinstalled_providers_from_docs() -> list[str]: - """ - Returns list of pre-installed providers from the doc. - """ - docs_content = get_file_content(DOCS_FILE) - preinstalled_section_regex = re.compile( - rf"\|\s*({PY_IDENTIFIER})\s*\|[^|]+pip install[^|]+\|[^|]+\|\s+\*\s+\|$", - re.MULTILINE, - ) - return preinstalled_section_regex.findall(docs_content) - - -def get_deprecated_extras_from_docs() -> dict[str, str]: - """ - Returns dict of deprecated extras from airflow.docs (alias -> target extra) - """ - deprecated_extras = {} - docs_content = get_file_content(DOCS_FILE) - - deprecated_extras_section_regex = re.compile( - r"\| Deprecated extra \| Extra to be used instead \|\n(.*)\n", re.DOTALL - ) - deprecated_extras_content = deprecated_extras_section_regex.findall(docs_content)[0] - - deprecated_extras_regexp = re.compile(r"\|\s(\S+)\s+\|\s(\S*)\s+\|$", re.MULTILINE) - for extras in deprecated_extras_regexp.findall(deprecated_extras_content): - deprecated_extras[extras[0]] = extras[1] - return deprecated_extras - - -def check_extras(console: Console) -> bool: - """ - Checks if non-deprecated extras match setup vs. doc. - :param console: print table there in case of errors - :return: True if all ok, False otherwise - """ - extras_table = Table() - extras_table.add_column("NAME", justify="right", style="cyan") - extras_table.add_column("SETUP", justify="center", style="magenta") - extras_table.add_column("DOCS", justify="center", style="yellow") - non_deprecated_setup_extras = get_extras_from_setup() - non_deprecated_docs_extras = get_extras_from_docs() - for extra in non_deprecated_setup_extras: - if extra not in non_deprecated_docs_extras: - extras_table.add_row(extra, "V", "") - for extra in non_deprecated_docs_extras: - if extra not in non_deprecated_setup_extras: - extras_table.add_row(extra, "", "V") - if extras_table.row_count != 0: - print( - f"""\ -[red bold]ERROR!![/red bold] - -The "[bold]CORE_EXTRAS_DEPENDENCIES[/bold]" -sections in the setup file: [bold yellow]{SETUP_PY_FILE}[/bold yellow] -should be synchronized with the "Extra Packages Reference" -in the documentation file: [bold yellow]{DOCS_FILE}[/bold yellow]. - -Below is the list of extras that: - - * are used but are not documented, - * are documented but not used, - -[bold]Please synchronize setup/documentation files![/bold] - -""" - ) - console.print(extras_table) - return False - return True - - -def check_deprecated_extras(console: Console) -> bool: - """ - Checks if deprecated extras match setup vs. doc. - :param console: print table there in case of errors - :return: True if all ok, False otherwise - """ - deprecated_setup_extras = EXTRAS_DEPRECATED_ALIASES - deprecated_docs_extras = get_deprecated_extras_from_docs() - - deprecated_extras_table = Table() - deprecated_extras_table.add_column("DEPRECATED_IN_SETUP", justify="right", style="cyan") - deprecated_extras_table.add_column("TARGET_IN_SETUP", justify="center", style="magenta") - deprecated_extras_table.add_column("DEPRECATED_IN_DOCS", justify="right", style="cyan") - deprecated_extras_table.add_column("TARGET_IN_DOCS", justify="center", style="magenta") - - for extra in deprecated_setup_extras.keys(): - if extra not in deprecated_docs_extras: - deprecated_extras_table.add_row(extra, deprecated_setup_extras[extra], "", "") - elif deprecated_docs_extras[extra] != deprecated_setup_extras[extra]: - deprecated_extras_table.add_row( - extra, deprecated_setup_extras[extra], extra, deprecated_docs_extras[extra] - ) - - for extra in deprecated_docs_extras.keys(): - if extra not in deprecated_setup_extras: - deprecated_extras_table.add_row("", "", extra, deprecated_docs_extras[extra]) - - if deprecated_extras_table.row_count != 0: - print( - f"""\ -[red bold]ERROR!![/red bold] - -The "[bold]EXTRAS_DEPRECATED_ALIASES[/bold]" section in the setup file:\ -[bold yellow]{SETUP_PY_FILE}[/bold yellow] -should be synchronized with the "Extra Packages Reference" -in the documentation file: [bold yellow]{DOCS_FILE}[/bold yellow]. - -Below is the list of deprecated extras that: - - * are used but are not documented, - * are documented but not used, - * or have different target extra specified in the documentation or setup. - -[bold]Please synchronize setup/documentation files![/bold] - -""" - ) - console.print(deprecated_extras_table) - return False - return True - - -def check_preinstalled_extras(console: Console) -> bool: - """ - Checks if preinstalled extras match setup vs. doc. - :param console: print table there in case of errors - :return: True if all ok, False otherwise - """ - preinstalled_providers_from_docs = get_preinstalled_providers_from_docs() - preinstalled_providers_from_setup = [provider.split(">=")[0] for provider in PREINSTALLED_PROVIDERS] - - preinstalled_providers_table = Table() - preinstalled_providers_table.add_column("PREINSTALLED_IN_SETUP", justify="right", style="cyan") - preinstalled_providers_table.add_column("PREINSTALLED_IN_DOCS", justify="center", style="magenta") - - for provider in preinstalled_providers_from_setup: - if provider not in preinstalled_providers_from_docs: - preinstalled_providers_table.add_row(provider, "") - - for provider in preinstalled_providers_from_docs: - if provider not in preinstalled_providers_from_setup: - preinstalled_providers_table.add_row("", provider) - - if preinstalled_providers_table.row_count != 0: - print( - f"""\ -[red bold]ERROR!![/red bold] - -The "[bold]PREINSTALLED_PROVIDERS[/bold]" section in the setup file:\ -[bold yellow]{SETUP_PY_FILE}[/bold yellow] -should be synchronized with the "Extra Packages Reference" -in the documentation file: [bold yellow]{DOCS_FILE}[/bold yellow]. - -Below is the list of preinstalled providers that: - * are used but are not documented, - * or are documented but not used. - -[bold]Please synchronize setup/documentation files![/bold] - -""" - ) - console.print(preinstalled_providers_table) - return False - return True - - -if __name__ == "__main__": - status: list[bool] = [] - # force adding all provider package dependencies, to check providers status - add_all_provider_packages() - main_console = Console() - status.append(check_extras(main_console)) - status.append(check_deprecated_extras(main_console)) - status.append(check_preinstalled_extras(main_console)) - - if all(status): - print("All extras are synchronized: [green]OK[/]") - sys.exit(0) - sys.exit(1) diff --git a/scripts/ci/pre_commit/pre_commit_compile_www_assets.py b/scripts/ci/pre_commit/pre_commit_compile_www_assets.py index 3365d1fb872ec..b98908d2fa1cc 100755 --- a/scripts/ci/pre_commit/pre_commit_compile_www_assets.py +++ b/scripts/ci/pre_commit/pre_commit_compile_www_assets.py @@ -20,6 +20,7 @@ import hashlib import os import re +import shutil import subprocess import sys from pathlib import Path @@ -53,14 +54,21 @@ def get_directory_hash(directory: Path, skip_path_regexp: str | None = None) -> if __name__ == "__main__": www_directory = AIRFLOW_SOURCES_PATH / "airflow" / "www" - WWW_HASH_FILE.parent.mkdir(exist_ok=True) - old_hash = WWW_HASH_FILE.read_text() if WWW_HASH_FILE.exists() else "" - new_hash = get_directory_hash(www_directory, skip_path_regexp=r".*node_modules.*") - if new_hash == old_hash: - print("The WWW directory has not changed! Skip regeneration.") - sys.exit(0) + node_modules_directory = www_directory / "node_modules" + dist_directory = www_directory / "static" / "dist" + if node_modules_directory.exists() and dist_directory.exists(): + WWW_HASH_FILE.parent.mkdir(exist_ok=True) + old_hash = WWW_HASH_FILE.read_text() if WWW_HASH_FILE.exists() else "" + new_hash = get_directory_hash(www_directory, skip_path_regexp=r".*node_modules.*") + if new_hash == old_hash: + print("The WWW directory has not changed! Skip regeneration.") + sys.exit(0) + else: + shutil.rmtree(node_modules_directory, ignore_errors=True) + shutil.rmtree(dist_directory, ignore_errors=True) env = os.environ.copy() env["FORCE_COLOR"] = "true" subprocess.check_call(["yarn", "install", "--frozen-lockfile"], cwd=os.fspath(www_directory)) subprocess.check_call(["yarn", "run", "build"], cwd=os.fspath(www_directory), env=env) + new_hash = get_directory_hash(www_directory, skip_path_regexp=r".*node_modules.*") WWW_HASH_FILE.write_text(new_hash) diff --git a/scripts/ci/pre_commit/pre_commit_insert_extras.py b/scripts/ci/pre_commit/pre_commit_insert_extras.py index f1a96b0a22e16..3c48f96b6307a 100755 --- a/scripts/ci/pre_commit/pre_commit_insert_extras.py +++ b/scripts/ci/pre_commit/pre_commit_insert_extras.py @@ -17,42 +17,88 @@ # under the License. from __future__ import annotations -import os import sys import textwrap +from enum import Enum from pathlib import Path -AIRFLOW_SOURCES_DIR = Path(__file__).parents[3].resolve() +# tomllib is available in Python 3.11+ and before that tomli offers same interface for parsing TOML files +try: + import tomllib +except ImportError: + import tomli as tomllib + +AIRFLOW_ROOT_PATH = Path(__file__).parents[3].resolve() +PYPROJECT_TOML_FILE_PATH = AIRFLOW_ROOT_PATH / "pyproject.toml" sys.path.insert(0, str(Path(__file__).parent.resolve())) # make sure common_precommit_utils is imported -sys.path.insert(0, str(AIRFLOW_SOURCES_DIR)) # make sure setup is imported from Airflow -# flake8: noqa: F401 +from common_precommit_utils import insert_documentation -os.environ["_SKIP_PYTHON_VERSION_CHECK"] = "true" -from common_precommit_utils import insert_documentation +class ExtraType(Enum): + DEVEL = "DEVEL" + DOC = "DOC" + EDITABLE = "EDITABLE" + STANDARD = "STANDARD" + + +def get_header_and_footer(extra_type: ExtraType, file_format: str) -> tuple[str, str]: + if file_format == "rst": + return f" .. START {extra_type.value} EXTRAS HERE", f" .. END {extra_type.value} EXTRAS HERE" + elif file_format == "txt": + return f"# START {extra_type.value} EXTRAS HERE", f"# END {extra_type.value} EXTRAS HERE" + else: + raise Exception(f"Bad format {format} passed. Only rst and txt are supported") + + +def get_wrapped_list(extras_set: set[str]) -> list[str]: + return [line + "\n" for line in textwrap.wrap(", ".join(sorted(extras_set)), 100)] + + +def get_extra_types_dict(extras: dict[str, list[str]]) -> dict[ExtraType, tuple[set[str], list[str]]]: + """ + Split extras into four types. + + :return: dictionary of extra types with tuple of two set,list - set of extras and text-wrapped list + """ + extra_type_dict: dict[ExtraType, tuple[set[str], list[str]]] = {} + + for extra_type in ExtraType: + extra_type_dict[extra_type] = (set(), []) + + for key, value in extras.items(): + if key.startswith("devel"): + extra_type_dict[ExtraType.DEVEL][0].add(key) + elif key in ["doc", "doc_gen"]: + extra_type_dict[ExtraType.DOC][0].add(key) + elif key.startswith("editable"): + extra_type_dict[ExtraType.EDITABLE][0].add(key) + else: + extra_type_dict[ExtraType.STANDARD][0].add(key) + + for extra_type in ExtraType: + extra_type_dict[extra_type][1].extend(get_wrapped_list(extra_type_dict[extra_type][0])) + + return extra_type_dict + + +def get_extras_from_pyproject_toml() -> dict[str, list[str]]: + pyproject_toml_content = tomllib.loads(PYPROJECT_TOML_FILE_PATH.read_text()) + return pyproject_toml_content["project"]["optional-dependencies"] -from setup import EXTRAS_DEPENDENCIES -sys.path.append(str(AIRFLOW_SOURCES_DIR)) +FILES_TO_UPDATE = [(AIRFLOW_ROOT_PATH / "INSTALL", "txt"), (AIRFLOW_ROOT_PATH / "CONTRIBUTING.rst", "rst")] -RST_HEADER = " .. START EXTRAS HERE" -RST_FOOTER = " .. END EXTRAS HERE" -INSTALL_HEADER = "# START EXTRAS HERE" -INSTALL_FOOTER = "# END EXTRAS HERE" +def process_documentation_files(): + extra_type_dict = get_extra_types_dict(get_extras_from_pyproject_toml()) + for file, file_format in FILES_TO_UPDATE: + if not file.exists(): + raise Exception(f"File {file} does not exist") + for extra_type in ExtraType: + header, footer = get_header_and_footer(extra_type, file_format) + insert_documentation(file, extra_type_dict[extra_type][1], header, footer) -CONSTANTS_HEADER = "# START EXTRAS HERE" -CONSTANTS_FOOTER = "# END EXTRAS HERE" if __name__ == "__main__": - install_file_path = AIRFLOW_SOURCES_DIR / "INSTALL" - contributing_file_path = AIRFLOW_SOURCES_DIR / "CONTRIBUTING.rst" - global_constants_file_path = ( - AIRFLOW_SOURCES_DIR / "dev" / "breeze" / "src" / "airflow_breeze" / "global_constants.py" - ) - extras_list = textwrap.wrap(", ".join(EXTRAS_DEPENDENCIES.keys()), 100) - extras_list = [line + "\n" for line in extras_list] - extras_code = [f" {extra}\n" for extra in EXTRAS_DEPENDENCIES.keys()] - insert_documentation(install_file_path, extras_list, INSTALL_HEADER, INSTALL_FOOTER) - insert_documentation(contributing_file_path, extras_list, RST_HEADER, RST_FOOTER) + process_documentation_files() diff --git a/scripts/ci/pre_commit/pre_commit_update_providers_dependencies.py b/scripts/ci/pre_commit/pre_commit_update_providers_dependencies.py index 5fa638505fece..610844a35e047 100755 --- a/scripts/ci/pre_commit/pre_commit_update_providers_dependencies.py +++ b/scripts/ci/pre_commit/pre_commit_update_providers_dependencies.py @@ -17,17 +17,26 @@ # under the License. from __future__ import annotations +import hashlib import json import os import sys from ast import Import, ImportFrom, NodeVisitor, parse from collections import defaultdict +from enum import Enum from pathlib import Path from typing import Any, List import yaml from rich.console import Console +# tomllib is available in Python 3.11+ and before that tomli offers same interface for parsing TOML files +try: + import tomllib +except ImportError: + import tomli as tomllib + + console = Console(color_system="standard", width=200) AIRFLOW_PROVIDERS_IMPORT_PREFIX = "airflow.providers." @@ -40,6 +49,12 @@ DEPENDENCIES_JSON_FILE_PATH = AIRFLOW_SOURCES_ROOT / "generated" / "provider_dependencies.json" +PYPROJECT_TOML_FILE_PATH = AIRFLOW_SOURCES_ROOT / "pyproject.toml" + +MY_FILE = Path(__file__).resolve() +MY_MD5SUM_FILE = MY_FILE.parent / MY_FILE.name.replace(".py", ".py.md5sum") + + sys.path.insert(0, str(AIRFLOW_SOURCES_ROOT)) # make sure setup is imported from Airflow warnings: list[str] = [] @@ -175,6 +190,156 @@ def check_if_different_provider_used(file_path: Path) -> None: STATES: dict[str, str] = {} +FOUND_EXTRAS: dict[str, list[str]] = defaultdict(list) + + +class ParsedDependencyTypes(Enum): + CORE_EXTRAS = "core extras" + APACHE_NO_PROVIDER_EXTRAS = "Apache no provider extras" + DEVEL_EXTRAS = "devel extras" + DOC_EXTRAS = "doc extras" + BUNDLE_EXTRAS = "bundle extras" + DEPRECATED_EXTRAS = "deprecated extras" + MANUAL_EXTRAS = "manual extras" + + +GENERATED_DEPENDENCIES_START = "# START OF GENERATED DEPENDENCIES" +GENERATED_DEPENDENCIES_END = "# END OF GENERATED DEPENDENCIES" + + +def normalize_extra(dependency: str) -> str: + return dependency.replace(".", "_").replace("-", "_") + + +def normalize_package_name(dependency: str) -> str: + return f"apache-airflow-providers-" f"{dependency.replace('.', '-').replace('_', '-')}" + + +def convert_provider_dependency_to_devel(dep): + provider_reminder = dep.replace("apache-airflow-providers-", "") + if ">=" in provider_reminder: + provider_reminder = provider_reminder.split(">=")[0] + dep = provider_reminder.replace("-", "_").replace(".", "_") + dep = f"apache-airflow[editable_{dep}]" + return dep + + +def generate_dependencies( + result_content: list[str], + pyproject_toml_content: dict[str, Any], + dependencies: dict[str, dict[str, list[str] | str]], +): + def generate_parsed_extras(type: ParsedDependencyTypes): + result_content.append(f" # {type.value}") + for extra in FOUND_EXTRAS[type.value]: + result_content.append(f' "apache-airflow[{extra}]",') + + def get_python_exclusion(dependency_info: dict[str, list[str] | str]): + excluded_python_versions = dependency_info.get("excluded-python-versions") + exclusion = "" + if excluded_python_versions: + separator = ";" + for version in excluded_python_versions: + exclusion += f'{separator}python_version != \\"{version}\\"' + separator = " and " + return exclusion + + for dependency, dependency_info in dependencies.items(): + if dependency_info["state"] in ["suspended", "removed"]: + continue + result_content.append(f"{normalize_extra(dependency)} = [") + result_content.append( + f' "{normalize_package_name(dependency)}' f'{get_python_exclusion(dependency_info)}",' + ) + result_content.append("]") + result_content.append("all = [") + generate_parsed_extras(ParsedDependencyTypes.CORE_EXTRAS) + generate_parsed_extras(ParsedDependencyTypes.APACHE_NO_PROVIDER_EXTRAS) + result_content.append(" # Provider extras") + for dependency, dependency_info in dependencies.items(): + result_content.append(f' "apache-airflow[{normalize_extra(dependency)}]",') + result_content.append("]") + for dependency, dependency_info in dependencies.items(): + result_content.append(f"editable_{normalize_extra(dependency)} = [") + for dep in dependency_info["deps"]: + if dep.startswith("apache-airflow-providers-"): + dep = convert_provider_dependency_to_devel(dep) + elif dep.startswith("apache-airflow>="): + continue + result_content.append(f' "{dep}{get_python_exclusion(dependency_info)}",') + result_content.append("]") + result_content.append("devel_all = [") + result_content.append(' "apache-airflow[devel]",') + result_content.append(' "apache-airflow[doc]",') + result_content.append(' "apache-airflow[doc_gen]",') + result_content.append(' "apache-airflow[saml]",') + generate_parsed_extras(ParsedDependencyTypes.APACHE_NO_PROVIDER_EXTRAS) + result_content.append(" # Include all manually added devel extras") + all_devel_deps = [ + dep + for dep in pyproject_toml_content["project"]["optional-dependencies"].keys() + if dep.startswith("devel_") and dep not in ["devel_all", "devel_ci", "devel_hadoop"] + ] + result_content.append(" # Include all provider deps") + for devel_dep in all_devel_deps: + result_content.append(f' "apache-airflow[{devel_dep}]",') + for dependency, dependency_info in dependencies.items(): + result_content.append(f' "apache-airflow[editable_{normalize_extra(dependency)}]",') + result_content.append("]") + + +def get_dependency_type(dependency_type: str) -> ParsedDependencyTypes | None: + for dep_type in ParsedDependencyTypes: + if dep_type.value == dependency_type: + return dep_type + return None + + +def update_pyproject_toml(dependencies: dict[str, dict[str, list[str] | str]]): + file_content = PYPROJECT_TOML_FILE_PATH.read_text() + pyproject_toml_content = tomllib.loads(file_content) + result_content: list[str] = [] + copying = True + current_type: str | None = None + line_count: int = 0 + for line in file_content.splitlines(): + if copying: + result_content.append(line) + if line.strip().startswith(GENERATED_DEPENDENCIES_START): + copying = False + generate_dependencies(result_content, pyproject_toml_content, dependencies) + elif line.strip().startswith(GENERATED_DEPENDENCIES_END): + copying = True + result_content.append(line) + elif line.strip().startswith("# START OF "): + current_type = line.strip().replace("# START OF ", "") + type_enum = get_dependency_type(current_type) + if type_enum is None: + console.print( + f"[red]Wrong start of section '{current_type}' in {PYPROJECT_TOML_FILE_PATH} " + f"at line {line_count}: Unknown section type" + ) + sys.exit(1) + elif line.strip().startswith("# END OF "): + end_type = line.strip().replace("# END OF ", "") + if end_type != current_type: + console.print( + f"[red]Wrong end of section {end_type} in {PYPROJECT_TOML_FILE_PATH} at line {line_count}" + ) + sys.exit(1) + if current_type: + if line.strip().endswith(" = ["): + FOUND_EXTRAS[current_type].append(line.split(" = [")[0].strip()) + line_count += 1 + PYPROJECT_TOML_FILE_PATH.write_text("\n".join(result_content) + "\n") + + +def calculate_my_hash(): + my_file = MY_FILE.resolve() + hash_md5 = hashlib.md5() + hash_md5.update(my_file.read_bytes()) + return hash_md5.hexdigest() + if __name__ == "__main__": find_all_providers_and_provider_files() @@ -210,9 +375,13 @@ def check_if_different_provider_used(file_path: Path) -> None: console.print("[red]Errors found during verification. Exiting!") console.print() sys.exit(1) - old_dependencies = DEPENDENCIES_JSON_FILE_PATH.read_text() + old_dependencies = ( + DEPENDENCIES_JSON_FILE_PATH.read_text() if DEPENDENCIES_JSON_FILE_PATH.exists() else "{}" + ) new_dependencies = json.dumps(unique_sorted_dependencies, indent=2) + "\n" - if new_dependencies != old_dependencies: + old_md5sum = MY_MD5SUM_FILE.read_text().strip() if MY_MD5SUM_FILE.exists() else "" + new_md5sum = calculate_my_hash() + if new_dependencies != old_dependencies or new_md5sum != old_md5sum: DEPENDENCIES_JSON_FILE_PATH.write_text(json.dumps(unique_sorted_dependencies, indent=2) + "\n") if os.environ.get("CI"): console.print() @@ -231,6 +400,8 @@ def check_if_different_provider_used(file_path: Path) -> None: ) console.print(f"[info]Written {DEPENDENCIES_JSON_FILE_PATH}") console.print() + update_pyproject_toml(unique_sorted_dependencies) + MY_MD5SUM_FILE.write_text(new_md5sum + "\n") sys.exit(1) else: console.print( diff --git a/scripts/ci/pre_commit/pre_commit_update_providers_dependencies.py.md5sum b/scripts/ci/pre_commit/pre_commit_update_providers_dependencies.py.md5sum new file mode 100644 index 0000000000000..045e034a1ae24 --- /dev/null +++ b/scripts/ci/pre_commit/pre_commit_update_providers_dependencies.py.md5sum @@ -0,0 +1 @@ +9f5a016cedd756918c1862ab474fc17f diff --git a/scripts/docker/entrypoint_ci.sh b/scripts/docker/entrypoint_ci.sh index cf0ee85bab2e4..260e317630eed 100755 --- a/scripts/docker/entrypoint_ci.sh +++ b/scripts/docker/entrypoint_ci.sh @@ -231,7 +231,7 @@ function check_download_sqlalchemy() { if [[ ${DOWNGRADE_SQLALCHEMY=} != "true" ]]; then return fi - min_sqlalchemy_version=$(grep "sqlalchemy>=" setup.cfg | sed "s/.*>=\([0-9\.]*\).*/\1/") + min_sqlalchemy_version=$(grep "sqlalchemy>=" pyproject.toml | sed "s/.*>=\([0-9\.]*\).*/\1/") echo echo "${COLOR_BLUE}Downgrading sqlalchemy to minimum supported version: ${min_sqlalchemy_version}${COLOR_RESET}" echo diff --git a/scripts/docker/install_airflow.sh b/scripts/docker/install_airflow.sh index 56fec404074e4..8264d70d847ec 100644 --- a/scripts/docker/install_airflow.sh +++ b/scripts/docker/install_airflow.sh @@ -85,7 +85,7 @@ function install_airflow() { "${AIRFLOW_INSTALLATION_METHOD}[${AIRFLOW_EXTRAS}]${AIRFLOW_VERSION_SPECIFICATION}" \ --constraint "${AIRFLOW_CONSTRAINTS_LOCATION}" common::install_pip_version - # then upgrade if needed without using constraints to account for new limits in setup.py + # then upgrade if needed without using constraints to account for new limits in pyproject.toml pip install --root-user-action ignore --upgrade --upgrade-strategy only-if-needed \ ${ADDITIONAL_PIP_INSTALL_FLAGS} \ ${AIRFLOW_INSTALL_EDITABLE_FLAG} \ diff --git a/scripts/docker/install_airflow_dependencies_from_branch_tip.sh b/scripts/docker/install_airflow_dependencies_from_branch_tip.sh index 26279c4bc85fb..9c809039c7df5 100644 --- a/scripts/docker/install_airflow_dependencies_from_branch_tip.sh +++ b/scripts/docker/install_airflow_dependencies_from_branch_tip.sh @@ -18,11 +18,11 @@ # shellcheck shell=bash disable=SC2086 # Installs Airflow from $AIRFLOW_BRANCH tip. This is pure optimisation. It is done because we do not want -# to reinstall all dependencies from scratch when setup.py changes. Problem with Docker caching is that +# to reinstall all dependencies from scratch when pyproject.toml changes. Problem with Docker caching is that # when a file is changed, when added to docker context, it invalidates the cache and it causes Docker # build to reinstall all dependencies from scratch. This can take a loooooot of time. Therefore we install # the dependencies first from main (and uninstall airflow right after) so that we can start installing -# deps from those pre-installed dependencies. It saves few minutes of build time when setup.py changes. +# deps from those pre-installed dependencies. It saves few minutes of build time when pyproject.toml changes. # # If INSTALL_MYSQL_CLIENT is set to false, mysql extra is removed # If INSTALL_POSTGRES_CLIENT is set to false, postgres extra is removed diff --git a/scripts/in_container/run_prepare_airflow_packages.py b/scripts/in_container/run_prepare_airflow_packages.py index 4b319a722c825..2e954382288b5 100755 --- a/scripts/in_container/run_prepare_airflow_packages.py +++ b/scripts/in_container/run_prepare_airflow_packages.py @@ -79,31 +79,34 @@ def process_summary(success_message: str, error_message: str, completed_process: rich.print("[bright_blue]Checking airflow version\n") airflow_version = subprocess.check_output( - [sys.executable, "setup.py", "--version"], text=True, cwd=AIRFLOW_SOURCES_ROOT + [sys.executable, "-m", "hatch", "version"], text=True, cwd=AIRFLOW_SOURCES_ROOT ).strip() rich.print(f"[green]Airflow version: {airflow_version}\n") RELEASED_VERSION_MATCHER = re.compile(r"^\d+\.\d+\.\d+$") -command = [sys.executable, "setup.py"] +build_command = [sys.executable, "hatch", "build"] if version_suffix: if RELEASED_VERSION_MATCHER.match(airflow_version): rich.print(f"[warning]Adding {version_suffix} suffix to the {airflow_version}") - command.extend(["egg_info", "--tag-build", version_suffix]) + sys.exit(2) elif not airflow_version.endswith(version_suffix): - rich.print(f"[red]Version {airflow_version} does not end with {version_suffix}. Using !") + rich.print( + f"[red]Version {airflow_version} does not end with {version_suffix} " + f"but with another one. Exiting!" + ) sys.exit(1) if package_format in ["both", "wheel"]: - command.append("bdist_wheel") + build_command.extend(["-t", "wheel"]) if package_format in ["both", "sdist"]: - command.append("sdist") + build_command.extend(["-t", "sdist"]) rich.print(f"[bright_blue]Building packages: {package_format}\n") -process = subprocess.run(command, capture_output=True, text=True, cwd=AIRFLOW_SOURCES_ROOT) +process = subprocess.run(build_command, capture_output=True, text=True, cwd=AIRFLOW_SOURCES_ROOT) process_summary("Airflow packages built successfully", "Error building Airflow packages", process) diff --git a/scripts/tools/initialize_virtualenv.py b/scripts/tools/initialize_virtualenv.py index e5e5633fdf22f..71a2916d8c1c1 100755 --- a/scripts/tools/initialize_virtualenv.py +++ b/scripts/tools/initialize_virtualenv.py @@ -74,8 +74,14 @@ def pip_install_requirements() -> int: You can specify extras as single coma-separated parameter to install. For example -* google,amazon,microsoft.azure -* devel_all +* devel - to have all development dependencies required to test core. +* devel_devscripts, devel_tests, devel_mypy - to selectively install tools that we use + to run scripts and tests +* editable_google,editable_amazon,editable_microsoft_azure - to install dependencies + needed at runtime by specified providers +* devel_azure, devel_deltalake - to install development tools needed by specific providers + to run some of their tests (those are dependencies that are not needed at runtime) +* devel_all - ti have all development dependencies required for all providers Note that "devel_all" installs all possible dependencies and we have > 600 of them, which might not be possible to install cleanly on your host because of lack of diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 9077a02915d70..0000000000000 --- a/setup.cfg +++ /dev/null @@ -1,239 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[metadata] -name = apache-airflow -summary = Programmatically author, schedule and monitor data pipelines -author = Apache Software Foundation -author_email = dev@airflow.apache.org -url = https://airflow.apache.org/ -version = attr: airflow.__version__ -long_description = file: generated/PYPI_README.md -long_description_content_type = text/markdown -license = Apache License 2.0 -license_files = - LICENSE - NOTICE - licenses/*.txt -classifiers = - Development Status :: 5 - Production/Stable - Environment :: Console - Environment :: Web Environment - Intended Audience :: Developers - Intended Audience :: System Administrators - License :: OSI Approved :: Apache Software License - Programming Language :: Python :: 3.8 - Programming Language :: Python :: 3.9 - Programming Language :: Python :: 3.10 - Programming Language :: Python :: 3.11 - Topic :: System :: Monitoring - Framework :: Apache Airflow -project_urls = - Bug Tracker=https://github.com/apache/airflow/issues - Documentation=https://airflow.apache.org/docs/ - Downloads=https://archive.apache.org/dist/airflow/ - Release Notes=https://airflow.apache.org/docs/apache-airflow/stable/release_notes.html - Slack Chat=https://s.apache.org/airflow-slack - Source Code=https://github.com/apache/airflow - Twitter=https://twitter.com/ApacheAirflow - YouTube=https://www.youtube.com/channel/UCSXwxpWZQ7XZ1WL3wqevChA/ - -[options] -zip_safe = False -include_package_data = True -# Mainly because of distutils deprecation and some packages not being compatible with it, we should -# Limit airflow to < 3.12 until those dependencies are ready and until we can support Python 3.12 -python_requires = ~=3.8,<3.12 -packages = find: -setup_requires = - gitpython - wheel -##################################################################################################### -# IMPORTANT NOTE!!!!!!!!!!!!!!! -# IF you are removing dependencies from this list, please make sure that you also increase -# DEPENDENCIES_EPOCH_NUMBER in the Dockerfile.ci -##################################################################################################### -install_requires = - # Alembic is important to handle our migrations in predictable and performant way. It is developed - # together with SQLAlchemy. Our experience with Alembic is that it very stable in minor version - alembic>=1.6.3, <2.0 - argcomplete>=1.10 - asgiref - attrs>=22.1.0 - blinker - # Colorlog 6.x merges TTYColoredFormatter into ColoredFormatter, breaking backwards compatibility with 4.x - # Update CustomTTYColoredFormatter to remove - colorlog>=4.0.2, <5.0 - configupdater>=3.1.1 - # `airflow/www/extensions/init_views` imports `connexion.decorators.validation.RequestBodyValidator` - # connexion v3 has refactored the entire module to middleware, see: /spec-first/connexion/issues/1525 - # Specifically, RequestBodyValidator was removed in: /spec-first/connexion/pull/1595 - # The usage was added in #30596, seemingly only to override and improve the default error message. - # Either revert that change or find another way, preferably without using connexion internals. - # This limit can be removed after https://github.com/apache/airflow/issues/35234 is fixed - connexion[flask]>=2.10.0,<3.0 - cron-descriptor>=1.2.24 - croniter>=0.3.17 - cryptography>=0.9.3 - deprecated>=1.2.13 - dill>=0.2.2 - # Flask 2.3 is scheduled to introduce a number of deprecation removals - some of them might be breaking - # for our dependencies - notably `_app_ctx_stack` and `_request_ctx_stack` removals. - # We should remove the limitation after 2.3 is released and our dependencies are updated to handle it - flask>=2.2,<2.3 - # We are tightly coupled with FAB version because we vendored in part of FAB code related to security manager - # This is done as part of preparation to removing FAB as dependency, but we are not ready for it yet - # Every time we update FAB version here, please make sure that you review the classes and models in - # `airflow/www/fab_security` with their upstream counterparts. In particular, make sure any breaking changes, - # for example any new methods, are accounted for. - # NOTE! When you change the value here, you also have to update flask-appbuilder[oauth] in setup.py - flask-appbuilder==4.3.10 - flask-caching>=1.5.0 - # We should be able to remove flask-login. It is still used in core Airflow but in tests. Looks feasible. - flask-login>=0.6.2 - flask-session>=0.4.0 - flask-wtf>=0.15 - fsspec>=2023.10.0 - google-re2>=1.0 - graphviz>=0.12 - gunicorn>=20.1.0 - httpx - importlib_metadata>=1.7;python_version<"3.9" - importlib_resources>=5.2;python_version<"3.9" - itsdangerous>=2.0 - jinja2>=3.0.0 - jsonschema>=4.18.0 - lazy-object-proxy - linkify-it-py>=2.0.0 - lockfile>=0.12.2 - markdown>=3.0 - markdown-it-py>=2.1.0 - markupsafe>=1.1.1 - marshmallow-oneofschema>=2.0.1 - mdit-py-plugins>=0.3.0 - opentelemetry-api>=1.15.0 - opentelemetry-exporter-otlp - packaging>=14.0 - pathspec>=0.9.0 - # When (if) pendulum 3 released it would introduce changes in module/objects imports, - # since we are tightly coupled with pendulum library internally it will breaks Airflow functionality. - pendulum>=2.0,<3.0 - pluggy>=1.0 - psutil>=4.2.0 - pydantic>=2.3.0 - pygments>=2.0.1 - pyjwt>=2.0.0 - python-daemon>=3.0.0 - python-dateutil>=2.3 - python-nvd3>=0.15.0 - python-slugify>=5.0 - rfc3339_validator>=0.1.4 - rich>=12.4.4 - rich-argparse>=1.0.0 - setproctitle>=1.1.8 - # We use some deprecated features of sqlalchemy 2.0 and we should replace them before we can upgrade - # See https://sqlalche.me/e/b8d9 for details of deprecated features - # you can set environment variable SQLALCHEMY_WARN_20=1 to show all deprecation warnings. - # The issue tracking it is https://github.com/apache/airflow/issues/28723 - sqlalchemy>=1.4.28,<2.0 - sqlalchemy_jsonfield>=1.0 - tabulate>=0.7.5 - tenacity>=6.2.0,!=8.2.0 - termcolor>=1.1.0 - typing-extensions>=4.0.0 - # We should remove this dependency when Providers are limited to Airflow 2.7+ - # as we replaced the usage of unicodecsv with csv in Airflow 2.7 - # See https://github.com/apache/airflow/pull/31693 - # We should also remove "licenses/LICENSE-unicodecsv.txt" file when we remove this dependency - unicodecsv>=0.14.1 - universal_pathlib>=0.1.4 - # Werkzug 3 breaks Flask-Login 0.6.2 - # we should remove this limitation when FAB supports Flask 2.3 - werkzeug>=2.0,<3 - -[options.packages.find] -include = - airflow* - -[options.package_data] -airflow= - py.typed - alembic.ini - git_version - customized_form_field_behaviours.schema.json - provider_info.schema.json - -airflow.api_connexion.openapi=*.yaml -airflow.providers.fab.auth_manager.openapi=*.yaml -airflow.serialization=*.json -airflow.utils= - context.pyi - -[options.data_files] -generated= - generated/provider_dependencies.json - -[options.entry_points] -console_scripts= - airflow=airflow.__main__:main - -[bdist_wheel] -python-tag=py3 - - -[files] -packages = airflow - -[easy_install] - -[mypy] -ignore_missing_imports = True -no_implicit_optional = True -warn_redundant_casts = True -warn_unused_ignores = False -plugins = - dev.mypy.plugin.decorators, - dev.mypy.plugin.outputs -pretty = True -show_error_codes = True -# Mypy since 0.991 warns about type annotations being present in an untyped -# function since they are not checked and technically meaningless. Ideally we -# should make them meaningful by setting check_untyped_defs = True, but there -# are too many issues in those currently unchecked functions to make it viable -# in the short term, so this error is simply ignored for now. -disable_error_code = annotation-unchecked - -[mypy-airflow.config_templates.default_webserver_config] -# This file gets written to user installs, so lets not litter it with type comments -disable_error_code = var-annotated - -[mypy-airflow.migrations.*] -ignore_errors = True - -# Let's assume all google.cloud packages have implicit optionals -# Most of them don't but even if they do, it does not matter -[mypy-google.cloud.*] -no_implicit_optional = False - -# Let's assume all azure packages have implicit optionals -[mypy-azure.*] -no_implicit_optional = False - - -[mypy-referencing.*] -# Referencing has some old type annotations that are not compatible with new versions of mypy -ignore_errors = True diff --git a/setup.cfg.back b/setup.cfg.back new file mode 100644 index 0000000000000..c135ae68c8a17 --- /dev/null +++ b/setup.cfg.back @@ -0,0 +1,29 @@ +license_files = + LICENSE + NOTICE + licenses/*.txt + +setup_requires = + gitpython + wheel + +[options.package_data] +airflow= + py.typed + alembic.ini + git_version + customized_form_field_behaviours.schema.json + provider_info.schema.json + +airflow.api_connexion.openapi=*.yaml +airflow.providers.fab.auth_manager.openapi=*.yaml +airflow.serialization=*.json +airflow.utils= + context.pyi + +[options.data_files] +generated= + generated/provider_dependencies.json + +[files] +packages = airflow diff --git a/setup.py b/setup.py deleted file mode 100644 index 4b43a0add9934..0000000000000 --- a/setup.py +++ /dev/null @@ -1,1085 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -"""Setup.py for the Airflow project.""" -# To make sure the CI build is using "upgrade to newer dependencies", which is useful when you want to check -# if the dependencies are still compatible with the latest versions as they seem to break some unrelated -# tests in main, you can modify this file. The modification can be simply modifying this particular comment. -# e.g. you can modify the following number "00001" to something else to trigger it. -from __future__ import annotations - -import glob -import json -import logging -import os -import subprocess -import sys -import textwrap -import unittest -from copy import deepcopy -from pathlib import Path -from typing import Iterable - -from setuptools import Command, Distribution, find_namespace_packages, setup -from setuptools.command.develop import develop as develop_orig -from setuptools.command.install import install as install_orig - -# Setuptools patches this import to point to a vendored copy instead of the -# stdlib, which is deprecated in Python 3.10 and will be removed in 3.12. -from distutils import log # isort: skip - - -# Controls whether providers are installed from packages or directly from sources -# It is turned on by default in case of development environments such as Breeze -# And it is particularly useful when you add a new provider and there is no -# PyPI version to install the provider package from -INSTALL_PROVIDERS_FROM_SOURCES = "INSTALL_PROVIDERS_FROM_SOURCES" -PY39 = sys.version_info >= (3, 9) - -logger = logging.getLogger(__name__) - -AIRFLOW_SOURCES_ROOT = Path(__file__).parent.resolve() -PROVIDERS_ROOT = AIRFLOW_SOURCES_ROOT / "airflow" / "providers" - -CROSS_PROVIDERS_DEPS = "cross-providers-deps" -DEPS = "deps" -CURRENT_PYTHON_VERSION = f"{sys.version_info.major}.{sys.version_info.minor}" - - -def apply_pypi_suffix_to_airflow_packages(dependencies: list[str]) -> None: - """ - Apply version suffix to dependencies that do not have one. - - Looks through the list of dependencies, finds which one are airflow or airflow providers packages - and applies the version suffix to those of them that do not have the suffix applied yet. - - :param dependencies: list of dependencies to add suffix to - """ - for i in range(len(dependencies)): - dependency = dependencies[i] - if dependency.startswith("apache-airflow"): - # in case we want to depend on other airflow package, the chance is the package - # has not yet been released to PyPI and we only see it as a local package that is - # being installed with .dev0 suffix in CI. Unfortunately, there is no way in standard - # PEP-440 compliant way to specify version that would be both - releasable, and - # testable to install on CI with .dev0 or .rc suffixes. We could add `--pre` flag to - # enable it, but `--pre` flag is not selective and will work for all packages so - # we would automatically install all "pre-release" packages for all packages that - # we install from PyPI - and this is definitely not what we want. So in order to - # install only airflow packages that are available in sources in .dev0 or .rc version - # we need to dynamically modify the dependencies here. - if ">=" in dependency: - package, version = dependency.split(">=") - version_spec = f">={version}" - version_suffix = os.environ.get("VERSION_SUFFIX_FOR_PYPI") - if version_suffix and version_suffix not in version_spec: - version_spec += version_suffix - dependencies[i] = f"{package}{version_spec}" - - -# NOTE! IN Airflow 2.4.+ dependencies for providers are maintained in `provider.yaml` files for each -# provider separately. They are loaded here and if you want to modify them, you need to modify -# corresponding provider.yaml file. -# -def fill_provider_dependencies() -> dict[str, dict[str, list[str]]]: - # in case we are loading setup from pre-commits, we want to skip the check for python version - # because if someone uses a version of Python where providers are excluded, the setup will fail - # to see the extras for those providers - skip_python_version_check = os.environ.get("_SKIP_PYTHON_VERSION_CHECK") - try: - with AIRFLOW_SOURCES_ROOT.joinpath("generated", "provider_dependencies.json").open() as f: - dependencies = json.load(f) - provider_dict = {} - for key, value in dependencies.items(): - if value["state"] in ["suspended", "removed"]: - continue - if value.get(DEPS): - apply_pypi_suffix_to_airflow_packages(value[DEPS]) - if CURRENT_PYTHON_VERSION not in value["excluded-python-versions"] or skip_python_version_check: - provider_dict[key] = value - return provider_dict - except Exception as e: - print(f"Exception while loading provider dependencies {e}") - # we can ignore loading dependencies when they are missing - they are only used to generate - # correct extras when packages are build and when we install airflow from sources - # (in both cases the provider_dependencies should be present). - return {} - - -PROVIDER_DEPENDENCIES = fill_provider_dependencies() - - -def airflow_test_suite() -> unittest.TestSuite: - """Test suite for Airflow tests.""" - test_loader = unittest.TestLoader() - test_suite = test_loader.discover(str(AIRFLOW_SOURCES_ROOT / "tests"), pattern="test_*.py") - return test_suite - - -class CleanCommand(Command): - """ - Command to tidy up the project root. - - Registered as cmdclass in setup() so it can be called with ``python setup.py extra_clean``. - """ - - description = "Tidy up the project root" - user_options: list[str] = [] - - def initialize_options(self) -> None: - """Set default values for options.""" - - def finalize_options(self) -> None: - """Set final values for options.""" - - @staticmethod - def rm_all_files(files: list[str]) -> None: - """Remove all files from the list.""" - for file in files: - try: - os.remove(file) - except Exception as e: - logger.warning("Error when removing %s: %s", file, e) - - def run(self) -> None: - """Remove temporary files and directories.""" - os.chdir(str(AIRFLOW_SOURCES_ROOT)) - self.rm_all_files(glob.glob("./build/*")) - self.rm_all_files(glob.glob("./**/__pycache__/*", recursive=True)) - self.rm_all_files(glob.glob("./**/*.pyc", recursive=True)) - self.rm_all_files(glob.glob("./dist/*")) - self.rm_all_files(glob.glob("./*.egg-info")) - self.rm_all_files(glob.glob("./docker-context-files/*.whl")) - self.rm_all_files(glob.glob("./docker-context-files/*.tgz")) - - -class CompileAssets(Command): - """ - Compile and build the frontend assets using yarn and webpack. - - Registered as cmdclass in setup() so it can be called with ``python setup.py compile_assets``. - """ - - description = "Compile and build the frontend assets" - user_options: list[str] = [] - - def initialize_options(self) -> None: - """Set default values for options.""" - - def finalize_options(self) -> None: - """Set final values for options.""" - - def run(self) -> None: - """Run a command to compile and build assets.""" - www_dir = AIRFLOW_SOURCES_ROOT / "airflow" / "www" - subprocess.check_call(["yarn", "install", "--frozen-lockfile"], cwd=str(www_dir)) - subprocess.check_call(["yarn", "run", "build"], cwd=str(www_dir)) - - -class ListExtras(Command): - """ - List all available extras. - - Registered as cmdclass in setup() so it can be called with ``python setup.py list_extras``. - """ - - description = "List available extras" - user_options: list[str] = [] - - def initialize_options(self) -> None: - """Set default values for options.""" - - def finalize_options(self) -> None: - """Set final values for options.""" - - def run(self) -> None: - """List extras.""" - print("\n".join(textwrap.wrap(", ".join(EXTRAS_DEPENDENCIES.keys()), 100))) - - -def git_version() -> str: - """ - Return a version to identify the state of the underlying git repo. - - The version will indicate whether the head of the current git-backed working directory - is tied to a release tag or not : it will indicate the former with a 'release:{version}' - prefix and the latter with a '.dev0' suffix. Following the prefix will be a sha of the - current branch head. Finally, a "dirty" suffix is appended to indicate that uncommitted - changes are present. - - :return: Found Airflow version in Git repo - """ - try: - import git - - try: - repo = git.Repo(str(AIRFLOW_SOURCES_ROOT / ".git")) - except git.NoSuchPathError: - logger.warning(".git directory not found: Cannot compute the git version") - return "" - except git.InvalidGitRepositoryError: - logger.warning("Invalid .git directory not found: Cannot compute the git version") - return "" - except ImportError: - logger.warning("gitpython not found: Cannot compute the git version.") - return "" - if repo: - sha = repo.head.commit.hexsha - if repo.is_dirty(): - return f".dev0+{sha}.dirty" - # commit is clean - return f".release:{sha}" - return "no_git_version" - - -def write_version(filename: str = str(AIRFLOW_SOURCES_ROOT / "airflow" / "git_version")) -> None: - """ - Write the Semver version + git hash to file, e.g. ".dev0+2f635dc265e78db6708f59f68e8009abb92c1e65". - - :param str filename: Destination file to write. - """ - text = git_version() - with open(filename, "w") as file: - file.write(text) - - -# -# NOTE! IN Airflow 2.4.+ dependencies for providers are maintained in `provider.yaml` files for each -# provider separately. Before, the provider dependencies were kept here. THEY ARE NOT HERE ANYMORE. -# -# 'Start dependencies group' and 'End dependencies group' are marks for ./scripts/ci/check_order_setup.py -# If you change these marks you should also change ./scripts/ci/check_order_setup.py -# Start dependencies group -async_packages = [ - "eventlet>=0.33.3", - "gevent>=0.13", - "greenlet>=0.4.9", -] -atlas = [ - "atlasclient>=0.1.2", -] -celery = [ - # The Celery is known to introduce problems when upgraded to a MAJOR version. Airflow Core - # Uses Celery for CeleryExecutor, and we also know that Kubernetes Python client follows SemVer - # (https://docs.celeryq.dev/en/stable/contributing.html?highlight=semver#versions). - # This is a crucial component of Airflow, so we should limit it to the next MAJOR version and only - # deliberately bump the version when we tested it, and we know it can be bumped. - # Bumping this version should also be connected with - # limiting minimum airflow version supported in celery provider due to the - # potential breaking changes in Airflow Core as well (celery is added as extra, so Airflow - # core is not hard-limited via install-requires, only by extra). - "celery>=5.3.0,<6" -] -cgroups = [ - # Cgroupspy 0.2.2 added Python 3.10 compatibility - "cgroupspy>=0.2.2", -] -deprecated_api = [ - "requests>=2.26.0", -] -doc = [ - # sphinx-autoapi fails with astroid 3.0, see: https://github.com/readthedocs/sphinx-autoapi/issues/407 - # This was fixed in sphinx-autoapi 3.0, however it has requirement sphinx>=6.1, but we stuck on 5.x - "astroid>=2.12.3, <3.0", - "checksumdir", - # click 8.1.4 and 8.1.5 generate mypy errors due to typing issue in the upstream package: - # https://github.com/pallets/click/issues/2558 - "click>=8.0,!=8.1.4,!=8.1.5", - # Docutils 0.17.0 converts generated
into
and breaks our doc formatting - # By adding a lot of whitespace separation. This limit can be lifted when we update our doc to handle - #
tags for sections - "docutils<0.17.0", - "eralchemy2", - "sphinx-airflow-theme", - "sphinx-argparse>=0.1.13", - "sphinx-autoapi>=2.0.0", - "sphinx-copybutton", - "sphinx-design>=0.5.0", - "sphinx-jinja>=2.0", - "sphinx-rtd-theme>=0.1.6", - "sphinx>=5.2.0", - "sphinxcontrib-httpdomain>=1.7.0", - "sphinxcontrib-redoc>=1.6.0", - "sphinxcontrib-spelling>=7.3", -] -doc_gen = [ - "eralchemy2", -] -flask_appbuilder_oauth = [ - "authlib>=1.0.0", - # The version here should be upgraded at the same time as flask-appbuilder in setup.cfg - "flask-appbuilder[oauth]==4.3.10", -] -kerberos = [ - "pykerberos>=1.1.13", - "requests_kerberos>=0.10.0", - "thrift_sasl>=0.2.0", -] -kubernetes = [ - # The Kubernetes API is known to introduce problems when upgraded to a MAJOR version. Airflow Core - # Uses Kubernetes for Kubernetes executor, and we also know that Kubernetes Python client follows SemVer - # (https://github.com/kubernetes-client/python#compatibility). This is a crucial component of Airflow - # So we should limit it to the next MAJOR version and only deliberately bump the version when we - # tested it, and we know it can be bumped. Bumping this version should also be connected with - # limiting minimum airflow version supported in cncf.kubernetes provider, due to the - # potential breaking changes in Airflow Core as well (kubernetes is added as extra, so Airflow - # core is not hard-limited via install-requires, only by extra). - "cryptography>=2.0.0", - "kubernetes>=21.7.0,<24", -] -ldap = [ - "ldap3>=2.5.1", - "python-ldap", -] -leveldb = ["plyvel"] -otel = ["opentelemetry-exporter-prometheus"] -pandas = [ - "pandas>=0.17.1", - # Use pyarrow-hotfix to fix https://nvd.nist.gov/vuln/detail/CVE-2023-47248. - # We should remove it once Apache Beam frees us to upgrade to pyarrow 14.0.1 - "pyarrow-hotfix", - "pyarrow>=9.0.0", -] -password = [ - "bcrypt>=2.0.0", - "flask-bcrypt>=0.7.1", -] -rabbitmq = [ - "amqp", -] -sentry = [ - "blinker>=1.1", - # Sentry SDK 1.33 is broken when greenlets are installed and fails to import - # See https://github.com/getsentry/sentry-python/issues/2473 - "sentry-sdk>=1.32.0,!=1.33.0", -] -statsd = [ - "statsd>=3.3.0", -] -virtualenv = [ - "virtualenv", -] -webhdfs = [ - "hdfs[avro,dataframe,kerberos]>=2.0.4", -] -# End dependencies group - -# Mypy 0.900 and above ships only with stubs from stdlib so if we need other stubs, we need to install them -# manually as `types-*`. See https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports -# for details. We want to install them explicitly because we want to eventually move to -# mypyd which does not support installing the types dynamically with --install-types -mypy_dependencies = [ - # TODO: upgrade to newer versions of MyPy continuously as they are released - # Make sure to upgrade the mypy version in update-common-sql-api-stubs in .pre-commit-config.yaml - # when you upgrade it here !!!! - "mypy==1.2.0", - "types-aiofiles", - "types-certifi", - "types-croniter", - "types-Deprecated", - "types-docutils", - "types-paramiko", - "types-protobuf", - "types-python-dateutil", - "types-python-slugify", - "types-pytz", - "types-redis", - "types-requests", - "types-setuptools", - "types-termcolor", - "types-tabulate", - "types-toml", - "types-Markdown", - "types-PyMySQL", - "types-PyYAML", -] - -# make sure to update providers/amazon/provider.yaml botocore min version when you update it here -_MIN_BOTO3_VERSION = "1.28.0" - -_devel_only_amazon = [ - "aws_xray_sdk", - "moto[cloudformation,glue]>=4.2.9", - f"mypy-boto3-rds>={_MIN_BOTO3_VERSION}", - f"mypy-boto3-redshift-data>={_MIN_BOTO3_VERSION}", - f"mypy-boto3-s3>={_MIN_BOTO3_VERSION}", - f"mypy-boto3-appflow>={_MIN_BOTO3_VERSION}", -] - -_devel_only_azure = [ - "pywinrm", -] - -_devel_only_breeze = [ - "filelock", -] - -_devel_only_debuggers = [ - "ipdb", -] - -_devel_only_deltalake = [ - "deltalake>=0.12.0", -] - -_devel_only_devscripts = [ - "click>=8.0", - "gitpython", - "pipdeptree", - "pygithub", - "rich-click>=1.7.0", - "restructuredtext-lint", - "semver", - "towncrier", - "twine", - "wheel", -] - -_devel_only_duckdb = [ - "duckdb>=0.9.0", -] - -_devel_only_mongo = [ - "mongomock", -] - -_devel_only_iceberg = [ - "pyiceberg>=0.5.0", -] - -_devel_only_sentry = [ - "blinker", -] - -_devel_only_static_checks = [ - "pre-commit", - "black", - "ruff>=0.0.219", - "yamllint", -] - -_devel_only_tests = [ - "aioresponses", - "backports.zoneinfo>=0.2.1;python_version<'3.9'", - "beautifulsoup4>=4.7.1", - "coverage>=7.2", - "pytest>=7.1", - # Pytest-asyncio 0.23.0 and 0.23.1 break test collection - # See https://github.com/pytest-dev/pytest-asyncio/issues/703 for details. - "pytest-asyncio!=0.23.0,!=0.23.1", - "pytest-cov", - "pytest-httpx", - "pytest-icdiff", - "pytest-instafail", - "pytest-mock", - "pytest-rerunfailures", - "pytest-timeouts", - "pytest-xdist", - "requests_mock", - "time-machine", -] - -# Dependencies needed for development only -devel_only = [ - *_devel_only_amazon, - *_devel_only_azure, - *_devel_only_breeze, - *_devel_only_debuggers, - *_devel_only_deltalake, - *_devel_only_devscripts, - *_devel_only_duckdb, - *_devel_only_mongo, - *_devel_only_iceberg, - *_devel_only_sentry, - *_devel_only_static_checks, - *_devel_only_tests, -] - -aiobotocore = [ - # This required for AWS deferrable operators. - # There is conflict between boto3 and aiobotocore dependency botocore. - # TODO: We can remove it once boto3 and aiobotocore both have compatible botocore version or - # boto3 have native aync support and we move away from aio aiobotocore - "aiobotocore>=2.1.1", -] - -s3fs = [ - # This is required for support of S3 file system which uses aiobotocore - # which can have a conflict with boto3 as mentioned above - "s3fs>=2023.10.0", -] - -saml = [ - # This is required for support of SAML which might be used by some providers (e.g. Amazon) - "python3-saml>=1.16.0", -] - - -def get_provider_dependencies(provider_name: str) -> list[str]: - if provider_name not in PROVIDER_DEPENDENCIES: - return [] - return PROVIDER_DEPENDENCIES[provider_name][DEPS] - - -def get_unique_dependency_list(req_list_iterable: Iterable[list[str]]): - _all_reqs: set[str] = set() - for req_list in req_list_iterable: - for req in req_list: - _all_reqs.add(req) - return list(_all_reqs) - - -devel = get_unique_dependency_list( - [ - aiobotocore, - cgroups, - devel_only, - doc, - kubernetes, - mypy_dependencies, - get_provider_dependencies("mysql"), - pandas, - password, - s3fs, - saml, - ] -) - -devel_hadoop = get_unique_dependency_list( - [ - devel, - get_provider_dependencies("apache.hdfs"), - get_provider_dependencies("apache.hive"), - get_provider_dependencies("apache.hdfs"), - get_provider_dependencies("apache.hive"), - get_provider_dependencies("apache.impala"), - kerberos, - get_provider_dependencies("presto"), - webhdfs, - ] -) - -# Those are all additional extras which do not have their own 'providers' -# The 'apache.atlas' and 'apache.webhdfs' are extras that provide additional libraries -# but they do not have separate providers (yet?), they are merely there to add extra libraries -# That can be used in custom python/bash operators. -ADDITIONAL_EXTRAS_DEPENDENCIES: dict[str, list[str]] = { - "apache.atlas": atlas, - "apache.webhdfs": webhdfs, -} - -# Those are extras that are extensions of the 'core' Airflow. They provide additional features -# To airflow core. They do not have separate providers because they do not have any operators/hooks etc. -CORE_EXTRAS_DEPENDENCIES: dict[str, list[str]] = { - "aiobotocore": aiobotocore, - "async": async_packages, - "celery": celery, # TODO: remove and move to a regular provider package in a separate PR - "cgroups": cgroups, - "cncf.kubernetes": kubernetes, # TODO: remove and move to a regular provider package in a separate PR - "deprecated_api": deprecated_api, - "github_enterprise": flask_appbuilder_oauth, - "google_auth": flask_appbuilder_oauth, - "kerberos": kerberos, - "ldap": ldap, - "leveldb": leveldb, - "otel": otel, - "pandas": pandas, - "password": password, - "rabbitmq": rabbitmq, - "s3fs": s3fs, - "saml": saml, - "sentry": sentry, - "statsd": statsd, - "virtualenv": virtualenv, -} - - -def filter_out_excluded_extras() -> Iterable[tuple[str, list[str]]]: - for key, value in CORE_EXTRAS_DEPENDENCIES.items(): - if value: - yield key, value - else: - print(f"Removing extra {key} as it has been excluded") - - -CORE_EXTRAS_DEPENDENCIES = dict(filter_out_excluded_extras()) - -EXTRAS_DEPENDENCIES: dict[str, list[str]] = deepcopy(CORE_EXTRAS_DEPENDENCIES) - - -def add_extras_for_all_providers() -> None: - for provider_name, provider_dict in PROVIDER_DEPENDENCIES.items(): - EXTRAS_DEPENDENCIES[provider_name] = provider_dict[DEPS] - - -def add_additional_extras() -> None: - for extra_name, extra_dependencies in ADDITIONAL_EXTRAS_DEPENDENCIES.items(): - EXTRAS_DEPENDENCIES[extra_name] = extra_dependencies - - -add_extras_for_all_providers() -add_additional_extras() - -############################################################################################################# -# The whole section can be removed in Airflow 3.0 as those old aliases are deprecated in 2.* series -############################################################################################################# - -# Dictionary of aliases from 1.10 - deprecated in Airflow 2.* -EXTRAS_DEPRECATED_ALIASES: dict[str, str] = { - "atlas": "apache.atlas", - "aws": "amazon", - "azure": "microsoft.azure", - "cassandra": "apache.cassandra", - "crypto": "", # this is legacy extra - all dependencies are already "install-requires" - "druid": "apache.druid", - "gcp": "google", - "gcp_api": "google", - "hdfs": "apache.hdfs", - "hive": "apache.hive", - "kubernetes": "cncf.kubernetes", - "mssql": "microsoft.mssql", - "pinot": "apache.pinot", - "s3": "amazon", - "spark": "apache.spark", - "webhdfs": "apache.webhdfs", - "winrm": "microsoft.winrm", -} - -EXTRAS_DEPRECATED_ALIASES_NOT_PROVIDERS: list[str] = [ - "crypto", - "webhdfs", -] - -EXTRAS_DEPRECATED_ALIASES_IGNORED_FROM_REF_DOCS: list[str] = [ - "jira", -] - - -def add_extras_for_all_deprecated_aliases() -> None: - """ - Add extras for all deprecated aliases. - - Requirements for those deprecated aliases are the same as the extras they are replaced with. - The dependencies are not copies - those are the same lists as for the new extras. This is intended. - Thanks to that if the original extras are later extended with providers, aliases are extended as well. - """ - for alias, extra in EXTRAS_DEPRECATED_ALIASES.items(): - dependencies = EXTRAS_DEPENDENCIES.get(extra) if extra != "" else [] - if dependencies is not None: - EXTRAS_DEPENDENCIES[alias] = dependencies - - -def add_all_deprecated_provider_packages() -> None: - """ - For deprecated aliases that are providers, swap the providers dependencies to be the provider itself. - - e.g. {"kubernetes": ["kubernetes>=3.0.0, <12.0.0", ...]} becomes - {"kubernetes": ["apache-airflow-provider-cncf-kubernetes"]} - """ - for alias, provider in EXTRAS_DEPRECATED_ALIASES.items(): - if alias not in EXTRAS_DEPRECATED_ALIASES_NOT_PROVIDERS: - replace_extra_dependencies_with_provider_packages(alias, [provider]) - - -add_extras_for_all_deprecated_aliases() - -############################################################################################################# -# End of deprecated section -############################################################################################################# - -# This is list of all providers. It's a shortcut for anyone who would like to easily get list of -# All providers. It is used by pre-commits. -ALL_PROVIDERS = list(PROVIDER_DEPENDENCIES.keys()) - -ALL_DB_PROVIDERS = [ - "apache.cassandra", - "apache.drill", - "apache.druid", - "apache.hdfs", - "apache.hive", - "apache.impala", - "apache.pinot", - "arangodb", - "cloudant", - "databricks", - "exasol", - "influxdb", - "microsoft.mssql", - "mongo", - "mysql", - "neo4j", - "postgres", - "presto", - "trino", - "vertica", -] - - -def get_all_db_dependencies() -> list[str]: - _all_db_reqs: set[str] = set() - for provider in ALL_DB_PROVIDERS: - if provider in PROVIDER_DEPENDENCIES: - for req in PROVIDER_DEPENDENCIES[provider][DEPS]: - _all_db_reqs.add(req) - return list(_all_db_reqs) - - -# Special dependencies for all database-related providers. They are de-duplicated. -all_dbs = get_all_db_dependencies() - -# All db user extras here -EXTRAS_DEPENDENCIES["all_dbs"] = all_dbs - -# Requirements for all "user" extras (no devel). They are de-duplicated. Note that we do not need -# to separately add providers dependencies - they have been already added as 'providers' extras above -_all_dependencies = get_unique_dependency_list(EXTRAS_DEPENDENCIES.values()) - -_all_dependencies_without_airflow_providers = [k for k in _all_dependencies if "apache-airflow-" not in k] - -# All user extras here -# all is purely development extra and it should contain only direct dependencies of Airflow -# It should contain all dependencies of airflow and dependencies of all community providers, -# but not the providers themselves -EXTRAS_DEPENDENCIES["all"] = _all_dependencies_without_airflow_providers - -# This can be simplified to devel_hadoop + _all_dependencies due to inclusions -# but we keep it for explicit sake. We are de-duplicating it anyway. -devel_all = get_unique_dependency_list( - [_all_dependencies_without_airflow_providers, doc, doc_gen, devel, devel_hadoop] -) - -# Those are packages excluded for "all" dependencies -PACKAGES_EXCLUDED_FOR_ALL: list[str] = [] - - -def is_package_excluded(package: str, exclusion_list: list[str]) -> bool: - """ - Check if package should be excluded. - - :param package: package name (beginning of it) - :param exclusion_list: list of excluded packages - :return: true if package should be excluded - """ - return package.startswith(tuple(exclusion_list)) - - -def remove_provider_limits(package: str) -> str: - """ - Remove the limit for providers in devel_all to account for pre-release and development packages. - - :param package: package name (beginning of it) - :return: true if package should be excluded - """ - return ( - package.split(">=")[0] - if package.startswith("apache-airflow-providers") and ">=" in package - else package - ) - - -devel = [remove_provider_limits(package) for package in devel] -devel_all = [ - remove_provider_limits(package) - for package in devel_all - if not is_package_excluded(package=package, exclusion_list=PACKAGES_EXCLUDED_FOR_ALL) -] -devel_hadoop = [remove_provider_limits(package) for package in devel_hadoop] -devel_ci = devel_all - - -# Those are extras that we have to add for development purposes -# They can be use to install some predefined set of dependencies. -EXTRAS_DEPENDENCIES["doc"] = doc -EXTRAS_DEPENDENCIES["doc_gen"] = doc_gen -EXTRAS_DEPENDENCIES["devel"] = devel # devel already includes doc -EXTRAS_DEPENDENCIES["devel_hadoop"] = devel_hadoop # devel_hadoop already includes devel -EXTRAS_DEPENDENCIES["devel_all"] = devel_all -EXTRAS_DEPENDENCIES["devel_ci"] = devel_ci - - -def sort_extras_dependencies() -> dict[str, list[str]]: - """ - Sort dependencies; the dictionary order remains when keys() are retrieved. - - Sort both: extras and list of dependencies to make it easier to analyse problems - external packages will be first, then if providers are added they are added at the end of the lists. - """ - sorted_dependencies: dict[str, list[str]] = {} - sorted_extra_ids = sorted(EXTRAS_DEPENDENCIES.keys()) - for extra_id in sorted_extra_ids: - sorted_dependencies[extra_id] = sorted(EXTRAS_DEPENDENCIES[extra_id]) - return sorted_dependencies - - -EXTRAS_DEPENDENCIES = sort_extras_dependencies() - -# Those providers are pre-installed always when airflow is installed. -# TODO: Sync them with the ones in dev/breeze/src/airflow_breeze/util/packages.py -PREINSTALLED_PROVIDERS = [ - # Until we cut off the 2.8.0 branch and bump current airflow version to 2.9.0, we should - # Keep common.io commented out in order ot be able to generate PyPI constraints because - # The version from PyPI has requirement of apache-airflow>=2.8.0 - # "common.io", - "common.sql", - "ftp", - "http", - "imap", - "sqlite", -] - - -def get_provider_package_name_from_package_id(package_id: str) -> str: - """ - Build the name of provider package out of the package id provided. - - :param package_id: id of the package (like amazon or microsoft.azure) - :return: full name of package in PyPI - """ - version_spec = "" - if ">=" in package_id: - package, version = package_id.split(">=") - version_spec = f">={version}" - version_suffix = os.environ.get("VERSION_SUFFIX_FOR_PYPI") - if version_suffix: - version_spec += version_suffix - else: - package = package_id - package_suffix = package.replace(".", "-") - return f"apache-airflow-providers-{package_suffix}{version_spec}" - - -def get_excluded_providers() -> list[str]: - """Return packages excluded for the current python version.""" - return [] - - -def get_all_provider_packages() -> str: - """Return all provider packages configured in setup.py.""" - excluded_providers = get_excluded_providers() - return " ".join( - get_provider_package_name_from_package_id(package) - for package in ALL_PROVIDERS - if package not in excluded_providers - ) - - -class AirflowDistribution(Distribution): - """The setuptools.Distribution subclass with Airflow specific behaviour.""" - - def __init__(self, attrs=None): - super().__init__(attrs) - self.install_requires = None - - def parse_config_files(self, *args, **kwargs) -> None: - """ - When asked to install providers from sources, ensure we don't *also* try to install from PyPI. - - Also we should make sure that in this case we copy provider.yaml files so that - Providers manager can find package information. - """ - super().parse_config_files(*args, **kwargs) - if os.getenv(INSTALL_PROVIDERS_FROM_SOURCES) == "true": - self.install_requires = [ - req for req in self.install_requires if not req.startswith("apache-airflow-providers-") - ] - provider_yaml_files = glob.glob("airflow/providers/**/provider.yaml", recursive=True) - for provider_yaml_file in provider_yaml_files: - provider_relative_path = os.path.relpath( - provider_yaml_file, str(AIRFLOW_SOURCES_ROOT / "airflow") - ) - self.package_data["airflow"].append(provider_relative_path) - # Add python_kubernetes_script.jinja2 to package data - self.package_data["airflow"].append("providers/cncf/kubernetes/python_kubernetes_script.jinja2") - # Add default email template to package data - self.package_data["airflow"].append("providers/smtp/notifications/templates/email.html") - else: - self.install_requires.extend( - [ - get_provider_package_name_from_package_id(package_id) - for package_id in PREINSTALLED_PROVIDERS - ] - ) - - -def replace_extra_dependencies_with_provider_packages(extra: str, providers: list[str]) -> None: - """ - Replace extra dependencies with provider package. - - The intention here is that when the provider is added as dependency of extra, there is no - need to add the dependencies separately. This is not needed and even harmful, because in - case of future versions of the provider, the dependencies might change, so hard-coding - dependencies from the version that was available at the release time might cause dependency - conflicts in the future. - - Say for example that you have salesforce provider with those deps: - - { 'salesforce': ['simple-salesforce>=1.0.0', 'tableauserverclient'] } - - Initially ['salesforce'] extra has those dependencies, and it works like that when you install - it when INSTALL_PROVIDERS_FROM_SOURCES is set to `true` (during the development). However, when - the production installation is used, The dependencies are changed: - - { 'salesforce': ['apache-airflow-providers-salesforce'] } - - And then, 'apache-airflow-providers-salesforce' package has those 'install_requires' dependencies: - ['simple-salesforce>=1.0.0', 'tableauserverclient'] - - So transitively 'salesforce' extra has all the dependencies it needs and in case the provider - changes its dependencies, they will transitively change as well. - - In the constraint mechanism we save both - provider versions and its dependencies - version, which means that installation using constraints is repeatable. - - For K8s and Celery which are both "Core executors" and "Providers" we have to - add the base dependencies to core as well, in order to mitigate problems where - newer version of provider will have less strict limits. This should be done for both - extras and their deprecated aliases. This is not a full protection however, the way - extras work, this will not add "hard" limits for Airflow and the user who does not use - constraints. - - :param extra: Name of the extra to add providers to - :param providers: list of provider ids - """ - if extra in ["cncf.kubernetes", "kubernetes", "celery"]: - EXTRAS_DEPENDENCIES[extra].extend( - [get_provider_package_name_from_package_id(package_name) for package_name in providers] - ) - elif extra == "apache.hive": - # We moved the hive macros to the hive provider, and they are available in hive provider only as of - # 5.1.0 version only, so we have to make sure minimum version is used - EXTRAS_DEPENDENCIES[extra] = ["apache-airflow-providers-apache-hive>=5.1.0"] - else: - EXTRAS_DEPENDENCIES[extra] = [ - get_provider_package_name_from_package_id(package_name) for package_name in providers - ] - - -def add_provider_packages_to_extra_dependencies(extra: str, providers: list[str]) -> None: - """ - Add provider packages as dependencies to extra. - - This is used to add provider packages as dependencies to the "bulk" kind of extras. - Those bulk extras do not have the detailed 'extra' dependencies as initial values, - so instead of replacing them (see previous function) we can extend them. - - :param extra: Name of the extra to add providers to - :param providers: list of provider ids - """ - EXTRAS_DEPENDENCIES[extra].extend( - [get_provider_package_name_from_package_id(package_name) for package_name in providers] - ) - - -def add_all_provider_packages() -> None: - """ - Add extra dependencies when providers are installed from packages. - - In case of regular installation (providers installed from packages), we should add extra dependencies to - Airflow - to get the providers automatically installed when those extras are installed. - - For providers installed from sources we skip that step. That helps to test and install airflow with - all packages in CI - for example when new providers are added, otherwise the installation would fail - as the new provider is not yet in PyPI. - - """ - for provider_id in ALL_PROVIDERS: - replace_extra_dependencies_with_provider_packages(provider_id, [provider_id]) - add_provider_packages_to_extra_dependencies("all", ALL_PROVIDERS) - add_provider_packages_to_extra_dependencies("devel_ci", ALL_PROVIDERS) - add_provider_packages_to_extra_dependencies("devel_all", ALL_PROVIDERS) - add_provider_packages_to_extra_dependencies("all_dbs", ALL_DB_PROVIDERS) - add_provider_packages_to_extra_dependencies( - "devel_hadoop", ["apache.hdfs", "apache.hive", "presto", "trino"] - ) - add_all_deprecated_provider_packages() - - -class Develop(develop_orig): - """Forces removal of providers in editable mode.""" - - def run(self) -> None: # type: ignore - self.announce("Installing in editable mode. Uninstalling provider packages!", level=log.INFO) - # We need to run "python3 -m pip" because it might be that older PIP binary is in the path - # And it results with an error when running pip directly (cannot import pip module) - # also PIP does not have a stable API so we have to run subprocesses ¯\_(ツ)_/¯ - try: - installed_packages = ( - subprocess.check_output(["python3", "-m", "pip", "freeze"]).decode().splitlines() - ) - airflow_provider_packages = [ - package_line.split("=")[0] - for package_line in installed_packages - if package_line.startswith("apache-airflow-providers") - ] - self.announce(f"Uninstalling ${airflow_provider_packages}!", level=log.INFO) - subprocess.check_call(["python3", "-m", "pip", "uninstall", "--yes", *airflow_provider_packages]) - except subprocess.CalledProcessError as e: - self.announce(f"Error when uninstalling airflow provider packages: {e}!", level=log.WARN) - super().run() - - -class Install(install_orig): - """Forces installation of providers from sources in editable mode.""" - - def run(self) -> None: - self.announce("Standard installation. Providers are installed from packages", level=log.INFO) - super().run() - - -def do_setup() -> None: - """ - Perform the Airflow package setup. - - Most values come from setup.cfg, only the dynamically calculated ones are passed to setup - function call. See https://setuptools.readthedocs.io/en/latest/userguide/declarative_config.html - """ - setup_kwargs = {} - - def include_provider_namespace_packages_when_installing_from_sources() -> None: - """ - When installing providers from sources we install all namespace packages found below airflow. - - Includes airflow and provider packages, otherwise defaults from setup.cfg control this. - The kwargs in setup() call override those that are specified in setup.cfg. - """ - if os.getenv(INSTALL_PROVIDERS_FROM_SOURCES) == "true": - setup_kwargs["packages"] = find_namespace_packages(include=["airflow*"]) - - include_provider_namespace_packages_when_installing_from_sources() - if os.getenv(INSTALL_PROVIDERS_FROM_SOURCES) == "true": - print("Installing providers from sources. Skip adding providers as dependencies") - else: - add_all_provider_packages() - - write_version() - setup( - distclass=AirflowDistribution, - extras_require=EXTRAS_DEPENDENCIES, - cmdclass={ - "extra_clean": CleanCommand, - "compile_assets": CompileAssets, - "list_extras": ListExtras, - "install": Install, # type: ignore - "develop": Develop, - }, - test_suite="setup.airflow_test_suite", - **setup_kwargs, # type: ignore - ) - - -if __name__ == "__main__": - do_setup() # comment to trigger upgrade to newer dependencies when setup.py is changed diff --git a/tests/providers/google/cloud/utils/base_gcp_mock.py b/tests/providers/google/cloud/utils/base_gcp_mock.py index 0fbfb3a6c17a9..c4295a3191bed 100644 --- a/tests/providers/google/cloud/utils/base_gcp_mock.py +++ b/tests/providers/google/cloud/utils/base_gcp_mock.py @@ -32,7 +32,7 @@ def mock_base_gcp_hook_default_project_id( impersonation_chain=None, delegate_to=None, ): - self.extras_list = {"project": GCP_PROJECT_ID_HOOK_UNIT_TEST} + self.standard_extras_list = {"project": GCP_PROJECT_ID_HOOK_UNIT_TEST} self._conn = gcp_conn_id self.impersonation_chain = impersonation_chain self._client = None @@ -48,7 +48,7 @@ def mock_base_gcp_hook_no_default_project_id( impersonation_chain=None, delegate_to=None, ): - self.extras_list = {} + self.standard_extras_list = {} self._conn = gcp_conn_id self.impersonation_chain = impersonation_chain self._client = None