diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8bbafa1efefd6..2025e2e0806a6 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -239,7 +239,7 @@ repos: files: Dockerfile.*$ pass_filenames: true - id: setup-order - name: Checks for an order of dependencies in setup.py + name: Check order of dependencies in setup.py language: python files: ^setup.py$ pass_filenames: false diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 3964789202de0..0987d21a8343a 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -543,13 +543,13 @@ This is the full list of those extras: all_dbs, amazon, apache.atlas, apache.beam, apache.cassandra, apache.druid, apache.hdfs, apache.hive, apache.kylin, apache.livy, apache.pig, apache.pinot, apache.spark, apache.sqoop, apache.webhdfs, async, atlas, aws, azure, cassandra, celery, cgroups, cloudant, cncf.kubernetes, -dask, databricks, datadog, devel, devel_hadoop, dingding, discord, doc, docker, druid, -elasticsearch, exasol, facebook, ftp, gcp, gcp_api, github_enterprise, google, google_auth, grpc, -hashicorp, hdfs, hive, http, imap, jdbc, jenkins, jira, kerberos, kubernetes, ldap, microsoft.azure, -microsoft.mssql, microsoft.winrm, mongo, mssql, mysql, odbc, openfaas, opsgenie, oracle, pagerduty, -papermill, password, pinot, plexus, postgres, presto, qds, qubole, rabbitmq, redis, salesforce, -samba, segment, sendgrid, sentry, sftp, singularity, slack, snowflake, spark, sqlite, ssh, statsd, -tableau, vertica, virtualenv, webhdfs, winrm, yandex, yandexcloud, zendesk, all, devel_ci +dask, databricks, datadog, dingding, discord, docker, druid, elasticsearch, exasol, facebook, ftp, +gcp, gcp_api, github_enterprise, google, google_auth, grpc, hashicorp, hdfs, hive, http, imap, jdbc, +jenkins, jira, kerberos, kubernetes, ldap, microsoft.azure, microsoft.mssql, microsoft.winrm, mongo, +mssql, mysql, odbc, openfaas, opsgenie, oracle, pagerduty, papermill, password, pinot, plexus, +postgres, presto, qds, qubole, rabbitmq, redis, salesforce, samba, segment, sendgrid, sentry, sftp, +singularity, slack, snowflake, spark, sqlite, ssh, statsd, tableau, vertica, virtualenv, webhdfs, +winrm, yandex, yandexcloud, zendesk, all, devel, devel_hadoop, doc, devel_all, devel_ci .. END EXTRAS HERE diff --git a/INSTALL b/INSTALL index c22d0507fefe1..cb04ede190dc4 100644 --- a/INSTALL +++ b/INSTALL @@ -66,13 +66,13 @@ pip install -e . \ all_dbs, amazon, apache.atlas, apache.beam, apache.cassandra, apache.druid, apache.hdfs, apache.hive, apache.kylin, apache.livy, apache.pig, apache.pinot, apache.spark, apache.sqoop, apache.webhdfs, async, atlas, aws, azure, cassandra, celery, cgroups, cloudant, cncf.kubernetes, -dask, databricks, datadog, devel, devel_hadoop, dingding, discord, doc, docker, druid, -elasticsearch, exasol, facebook, ftp, gcp, gcp_api, github_enterprise, google, google_auth, grpc, -hashicorp, hdfs, hive, http, imap, jdbc, jenkins, jira, kerberos, kubernetes, ldap, microsoft.azure, -microsoft.mssql, microsoft.winrm, mongo, mssql, mysql, odbc, openfaas, opsgenie, oracle, pagerduty, -papermill, password, pinot, plexus, postgres, presto, qds, qubole, rabbitmq, redis, salesforce, -samba, segment, sendgrid, sentry, sftp, singularity, slack, snowflake, spark, sqlite, ssh, statsd, -tableau, vertica, virtualenv, webhdfs, winrm, yandex, yandexcloud, zendesk, all, devel_ci +dask, databricks, datadog, dingding, discord, docker, druid, elasticsearch, exasol, facebook, ftp, +gcp, gcp_api, github_enterprise, google, google_auth, grpc, hashicorp, hdfs, hive, http, imap, jdbc, +jenkins, jira, kerberos, kubernetes, ldap, microsoft.azure, microsoft.mssql, microsoft.winrm, mongo, +mssql, mysql, odbc, openfaas, opsgenie, oracle, pagerduty, papermill, password, pinot, plexus, +postgres, presto, qds, qubole, rabbitmq, redis, salesforce, samba, segment, sendgrid, sentry, sftp, +singularity, slack, snowflake, spark, sqlite, ssh, statsd, tableau, vertica, virtualenv, webhdfs, +winrm, yandex, yandexcloud, zendesk, all, devel, devel_hadoop, doc, devel_all, devel_ci # END EXTRAS HERE diff --git a/UPDATING.md b/UPDATING.md index 007ce490c2f31..81f735156725f 100644 --- a/UPDATING.md +++ b/UPDATING.md @@ -52,6 +52,13 @@ assists users migrating to a new version. ## Master +### Rename `all` to `devel_all` extra + +The `all` extras were reduced to include only user-facing dependencies. This means +that this extra does not contain development dependencies. If you were relying on +`all` extra then you should use now `devel_all` or figure out if you need development +extras at all. + ### `[scheduler] max_threads` config has been renamed to `[scheduler] parsing_processes` From Airflow 2.0, `max_threads` config under `[scheduler]` section has been renamed to `parsing_processes`. diff --git a/UPGRADING_TO_2.0.md b/UPGRADING_TO_2.0.md index dc9446016ae54..b21c81ee4ef74 100644 --- a/UPGRADING_TO_2.0.md +++ b/UPGRADING_TO_2.0.md @@ -29,10 +29,9 @@ assists users migrating to a new version. - [Step 1: Upgrade to Python 3](#step-1-upgrade-to-python-3) - [Step 2: Upgrade to Airflow 1.10.13 (a.k.a our "bridge" release)](#step-2-upgrade-to-airflow-11013-aka-our-bridge-release) - [Step 3: Set Operators to Backport Providers](#step-3-set-operators-to-backport-providers) -- [Step 3: Upgrade Airflow DAGs](#step-3-upgrade-airflow-dags) +- [Step 4: Upgrade Airflow DAGs](#step-4-upgrade-airflow-dags) - [Change to undefined variable handling in templates](#change-to-undefined-variable-handling-in-templates) - [Changes to the KubernetesPodOperator](#changes-to-the-kubernetespodoperator) -- [Step 4: Update system configurations](#step-4-update-system-configurations) - [Change default value for dag_run_conf_overrides_params](#change-default-value-for-dag_run_conf_overrides_params) - [DAG discovery safe mode is now case insensitive](#dag-discovery-safe-mode-is-now-case-insensitive) - [Change to Permissions](#change-to-permissions) @@ -47,6 +46,7 @@ assists users migrating to a new version. - [Changes to Exception handling for from DAG callbacks](#changes-to-exception-handling-for-from-dag-callbacks) - [Airflow CLI changes in 2.0](#airflow-cli-changes-in-20) - [Changes to Airflow Plugins](#changes-to-airflow-plugins) + - [Changes to extras names](#changes-to-extras-names) - [Support for Airflow 1.10.x releases](#support-for-airflow-110x-releases) @@ -131,7 +131,7 @@ pip install airflow[docker] automatically installs the `apache-airflow-providers-docker` package. But you can manage/upgrade remove provider packages separately from the airflow core. -## Step 3: Upgrade Airflow DAGs +## Step 4: Upgrade Airflow DAGs ### Change to undefined variable handling in templates @@ -245,7 +245,6 @@ Kubernetes secrets into workers. For a more detailed list of changes to the KubernetesPodOperator API, please read [here](#Changed-Parameters-for-the-KubernetesPodOperator) -## Step 4: Update system configurations ### Change default value for dag_run_conf_overrides_params @@ -1076,6 +1075,12 @@ class AirflowTestPlugin(AirflowPlugin): appbuilder_menu_items = [appbuilder_mitem] ``` +### Changes to extras names + +The `all` extra were reduced to include only user-facing dependencies. This means +that this extra does not contain development dependencies. If you were using it and +depending on the development packages then you should use `devel_all`. + ### Support for Airflow 1.10.x releases As mentioned earlier in Step 2, the 1.10.13 release is intended to be a "bridge release" diff --git a/dev/provider_packages/README.md b/dev/provider_packages/README.md index fa1ce8713a385..66149119e2011 100644 --- a/dev/provider_packages/README.md +++ b/dev/provider_packages/README.md @@ -223,7 +223,7 @@ export BACKPORT_PACKAGES="true" ```shell script cd /airflow_sources -pip install ".[all]" +pip install ".[devel_all]" pip install "apache-airflow==${INSTALL_AIRFLOW_VERSION}" diff --git a/docs/extra-packages-ref.rst b/docs/extra-packages-ref.rst index 5f500bfaf5eda..997701f88f1e8 100644 --- a/docs/extra-packages-ref.rst +++ b/docs/extra-packages-ref.rst @@ -26,15 +26,19 @@ Here's the list of the :ref:`subpackages ` and what +---------------------+-----------------------------------------------------+----------------------------------------------------------------------+ | subpackage | install command | enables | +=====================+=====================================================+======================================================================+ -| all | ``pip install 'apache-airflow[all]'`` | All Airflow features known to man | +| all | ``pip install 'apache-airflow[all]'`` | All Airflow user facing features (no devel and doc requirements) | +---------------------+-----------------------------------------------------+----------------------------------------------------------------------+ | all_dbs | ``pip install 'apache-airflow[all_dbs]'`` | All databases integrations | +---------------------+-----------------------------------------------------+----------------------------------------------------------------------+ -| devel | ``pip install 'apache-airflow[devel]'`` | Minimum dev tools requirements | +| devel | ``pip install 'apache-airflow[devel]'`` | Minimum dev tools requirements (without ``all``) | +---------------------+-----------------------------------------------------+----------------------------------------------------------------------+ -| devel_hadoop | ``pip install 'apache-airflow[devel_hadoop]'`` | Airflow + dependencies on the Hadoop stack | +| devel_hadoop | ``pip install 'apache-airflow[devel_hadoop]'`` | Same as ``devel`` + dependencies for developing the Hadoop stack | +---------------------+-----------------------------------------------------+----------------------------------------------------------------------+ -| doc | ``pip install 'apache-airflow[doc]'`` | Packages needed to build docs | +| devel_all | ``pip install 'apache-airflow[devel_all]'`` | Everything needed for development (``devel_hadoop`` + ``all``) | ++---------------------+-----------------------------------------------------+----------------------------------------------------------------------+ +| devel_ci | ``pip install 'apache-airflow[devel_ci]'`` | All dependencies required for CI build. | ++---------------------+-----------------------------------------------------+----------------------------------------------------------------------+ +| doc | ``pip install 'apache-airflow[doc]'`` | Packages needed to build docs (included in ``devel``) | +---------------------+-----------------------------------------------------+----------------------------------------------------------------------+ | password | ``pip install 'apache-airflow[password]'`` | Password authentication for users | +---------------------+-----------------------------------------------------+----------------------------------------------------------------------+ diff --git a/scripts/ci/pre_commit/pre_commit_check_setup_extra_packages_ref.py b/scripts/ci/pre_commit/pre_commit_check_setup_extra_packages_ref.py index d8d9bf2780cd4..4dee47c5d6aab 100755 --- a/scripts/ci/pre_commit/pre_commit_check_setup_extra_packages_ref.py +++ b/scripts/ci/pre_commit/pre_commit_check_setup_extra_packages_ref.py @@ -45,7 +45,7 @@ def get_file_content(*path_elements: str) -> str: def get_extras_from_setup() -> Dict[str, List[str]]: """ Returns an array EXTRAS_REQUIREMENTS with aliases from setup.py file in format: - {'package name': ['alias1', 'alias2], ...} + {'package name': ['alias1', 'alias2'], ...} """ setup_content = get_file_content(SETUP_PY_FILE) @@ -53,7 +53,7 @@ def get_extras_from_setup() -> Dict[str, List[str]]: extras_section = extras_section_regex.findall(setup_content)[0] extras_regex = re.compile( - rf'^\s+[\"\']({PY_IDENTIFIER})[\"\']:\s*({PY_IDENTIFIER}|\[\])[^#\n]*(#\s*TODO.*)?$', re.MULTILINE + rf'^\s*[\"\']({PY_IDENTIFIER})[\"\']:\s*({PY_IDENTIFIER}|\[\])[^#\n]*(#\s*.*)?$', re.MULTILINE ) extras_dict: Dict[str, List[str]] = {} @@ -66,6 +66,15 @@ def get_extras_from_setup() -> Dict[str, List[str]]: if not extras_dict.get(package): extras_dict[package] = [] extras_dict[package].append(alias) + + updates_sections_regex = re.compile(r"^EXTRAS_REQUIREMENTS\.update[^{]+{([^}]+)}", re.MULTILINE) + updates_sections = updates_sections_regex.findall(setup_content) + for update in updates_sections: + for extra in extras_regex.findall(update): + package = extra[0] + if not extras_dict.get(package): + extras_dict[package] = [extra[0]] + return extras_dict @@ -80,7 +89,6 @@ def get_extras_from_docs() -> List[str]: ) extras = extras_section_regex.findall(docs_content) - extras = list(filter(lambda entry: entry != 'all', extras)) return extras diff --git a/scripts/in_container/run_prepare_provider_readme.sh b/scripts/in_container/run_prepare_provider_readme.sh index 38a3af271051d..197fefffe7ecb 100755 --- a/scripts/in_container/run_prepare_provider_readme.sh +++ b/scripts/in_container/run_prepare_provider_readme.sh @@ -32,7 +32,7 @@ verify_suffix_versions_for_package_preparation echo echo "Installing remaining packages from 'all' extras" echo -pip install -e ".[all]" >>"${OUT_FILE_PRINTED_ON_ERROR}" 2>&1 +pip install -e ".[devel_all]" >>"${OUT_FILE_PRINTED_ON_ERROR}" 2>&1 echo > "${OUT_FILE_PRINTED_ON_ERROR}" diff --git a/scripts/in_container/run_test_package_import_all_classes.sh b/scripts/in_container/run_test_package_import_all_classes.sh index 81d0000be209f..3d760281948b6 100755 --- a/scripts/in_container/run_test_package_import_all_classes.sh +++ b/scripts/in_container/run_test_package_import_all_classes.sh @@ -46,7 +46,7 @@ else echo echo "Installing remaining packages from 'all' extras" echo - pip install ".[all]" >>"${OUT_FILE_PRINTED_ON_ERROR}" 2>&1 + pip install ".[devel_all]" >>"${OUT_FILE_PRINTED_ON_ERROR}" 2>&1 echo echo "Uninstalling airflow after that" echo diff --git a/setup.py b/setup.py index eae499ba86d3b..190297cb9c68a 100644 --- a/setup.py +++ b/setup.py @@ -589,11 +589,8 @@ def write_version(filename: str = os.path.join(*[my_dir, "airflow", "git_version 'dask': dask, 'databricks': databricks, 'datadog': datadog, - 'devel': devel_minreq, - 'devel_hadoop': devel_hadoop, 'dingding': [], 'discord': [], - 'doc': doc, 'docker': docker, 'druid': druid, # TODO: remove this in Airflow 2.1 'elasticsearch': elasticsearch, @@ -663,9 +660,10 @@ def write_version(filename: str = os.path.join(*[my_dir, "airflow", "git_version EXTRAS_PROVIDERS_PACKAGES: Dict[str, Iterable[str]] = { 'all': list(PROVIDERS_REQUIREMENTS.keys()), - # this is not 100% accurate with devel_ci definition, but we really want to have all providers - # when devel_ci extra is installed! + # this is not 100% accurate with devel_ci and devel_all definition, but we really want + # to have all providers when devel_ci extra is installed! 'devel_ci': list(PROVIDERS_REQUIREMENTS.keys()), + 'devel_all': list(PROVIDERS_REQUIREMENTS.keys()), 'all_dbs': [ "apache.cassandra", "apache.druid", @@ -779,15 +777,24 @@ def write_version(filename: str = os.path.join(*[my_dir, "airflow", "git_version 'zendesk': ["zendesk"], } - -# Make devel_all contain all providers + extras + unique -devel_all = list( +# All "users" extras (no devel extras) +all_ = list( set( - devel - + [req for req_list in EXTRAS_REQUIREMENTS.values() for req in req_list] + [req for req_list in EXTRAS_REQUIREMENTS.values() for req in req_list] + [req for req_list in PROVIDERS_REQUIREMENTS.values() for req in req_list] ) ) +EXTRAS_REQUIREMENTS.update( + { + 'all': all_, + 'devel': devel_minreq, # includes doc + 'devel_hadoop': devel_hadoop, # includes devel_minreq + 'doc': doc, + } +) +# This can be simplify to devel_hadoop + all_ due to inclusions +# but we keep it for explicit sake +devel_all = list(set(all_ + doc + devel_minreq + devel_hadoop)) PACKAGES_EXCLUDED_FOR_ALL = [] @@ -821,6 +828,7 @@ def is_package_excluded(package: str, exclusion_list: List[str]): for package in devel_all if not is_package_excluded(package=package, exclusion_list=PACKAGES_EXCLUDED_FOR_ALL) ] + devel_ci = [ package for package in devel_all @@ -831,7 +839,7 @@ def is_package_excluded(package: str, exclusion_list: List[str]): EXTRAS_REQUIREMENTS.update( { - 'all': devel_all, + 'devel_all': devel_all, 'devel_ci': devel_ci, } )