diff --git a/Dockerfile b/Dockerfile index 50d58957e1cad..496fa20aa3d41 100644 --- a/Dockerfile +++ b/Dockerfile @@ -192,20 +192,23 @@ ENV AIRFLOW_PRE_CACHED_PIP_PACKAGES=${AIRFLOW_PRE_CACHED_PIP_PACKAGES} ARG INSTALL_PROVIDERS_FROM_SOURCES="false" ENV INSTALL_PROVIDERS_FROM_SOURCES=${INSTALL_PROVIDERS_FROM_SOURCES} -# Increase the value here to force reinstalling Apache Airflow pip dependencies -ARG PIP_DEPENDENCIES_EPOCH_NUMBER="1" -ENV PIP_DEPENDENCIES_EPOCH_NUMBER=${PIP_DEPENDENCIES_EPOCH_NUMBER} - # Only copy install_airflow_from_latest_master.sh to not invalidate cache on other script changes COPY scripts/docker/install_airflow_from_latest_master.sh /scripts/docker/install_airflow_from_latest_master.sh # fix permission issue in Azure DevOps when running the script RUN chmod a+x /scripts/docker/install_airflow_from_latest_master.sh +# By default we do not upgrade to latest dependencies +ARG UPGRADE_TO_NEWER_DEPENDENCIES="false" +ENV UPGRADE_TO_NEWER_DEPENDENCIES=${UPGRADE_TO_NEWER_DEPENDENCIES} + # In case of Production build image segment we want to pre-install master version of airflow # dependencies from GitHub so that we do not have to always reinstall it from the scratch. # The Airflow (and providers in case INSTALL_PROVIDERS_FROM_SOURCES is "false") # are uninstalled, only dependencies remain -RUN if [[ ${AIRFLOW_PRE_CACHED_PIP_PACKAGES} == "true" ]]; then \ +# the cache is only used when "upgrade to newer dependencies" is not set to automatically +# account for removed dependencies (we do not install them in the first place) +RUN if [[ ${AIRFLOW_PRE_CACHED_PIP_PACKAGES} == "true" && \ + ${UPGRADE_TO_NEWER_DEPENDENCIES} == "false" ]]; then \ /scripts/docker/install_airflow_from_latest_master.sh; \ fi @@ -255,13 +258,10 @@ ENV INSTALL_FROM_DOCKER_CONTEXT_FILES=${INSTALL_FROM_DOCKER_CONTEXT_FILES} ARG INSTALL_FROM_PYPI="true" ENV INSTALL_FROM_PYPI=${INSTALL_FROM_PYPI} -# By default we do not upgrade to latest dependencies -ARG UPGRADE_TO_NEWER_DEPENDENCIES="false" -ENV UPGRADE_TO_NEWER_DEPENDENCIES=${UPGRADE_TO_NEWER_DEPENDENCIES} - # Those are additional constraints that are needed for some extras but we do not want to # Force them on the main Airflow package. # * urllib3 - required to keep boto3 happy +# * chardet<4 - required to keep snowflake happy ARG EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS="urllib3<1.26 chardet<4" WORKDIR /opt/airflow diff --git a/Dockerfile.ci b/Dockerfile.ci index 672ab6793d090..c0ad778942145 100644 --- a/Dockerfile.ci +++ b/Dockerfile.ci @@ -286,22 +286,23 @@ RUN echo "Pip no cache dir: ${PIP_NO_CACHE_DIR}" RUN pip install --upgrade "pip==${AIRFLOW_PIP_VERSION}" -# Increase the value here to force reinstalling Apache Airflow pip dependencies -ARG PIP_DEPENDENCIES_EPOCH_NUMBER="5" -ENV PIP_DEPENDENCIES_EPOCH_NUMBER=${PIP_DEPENDENCIES_EPOCH_NUMBER} - # Only copy install_airflow_from_latest_master.sh to not invalidate cache on other script changes COPY scripts/docker/install_airflow_from_latest_master.sh /scripts/docker/install_airflow_from_latest_master.sh # fix permission issue in Azure DevOps when running the script RUN chmod a+x /scripts/docker/install_airflow_from_latest_master.sh +ARG UPGRADE_TO_NEWER_DEPENDENCIES="false" +ENV UPGRADE_TO_NEWER_DEPENDENCIES=${UPGRADE_TO_NEWER_DEPENDENCIES} + # In case of CI builds we want to pre-install master version of airflow dependencies so that # We do not have to always reinstall it from the scratch. -# This can be reinstalled from latest master by increasing PIP_DEPENDENCIES_EPOCH_NUMBER. # And is automatically reinstalled from the scratch every time patch release of python gets released # The Airflow (and providers in case INSTALL_PROVIDERS_FROM_SOURCES is "false") -# are uninstalled, only dependencies remain -RUN if [[ ${AIRFLOW_PRE_CACHED_PIP_PACKAGES} == "true" ]]; then \ +# are uninstalled, only dependencies remain. +# the cache is only used when "upgrade to newer dependencies" is not set to automatically +# account for removed dependencies (we do not install them in the first place) +RUN if [[ ${AIRFLOW_PRE_CACHED_PIP_PACKAGES} == "true" && \ + ${UPGRADE_TO_NEWER_DEPENDENCIES} == "false" ]]; then \ /scripts/docker/install_airflow_from_latest_master.sh; \ fi @@ -331,18 +332,13 @@ COPY setup.cfg ${AIRFLOW_SOURCES}/setup.cfg COPY airflow/__init__.py ${AIRFLOW_SOURCES}/airflow/__init__.py -ARG UPGRADE_TO_NEWER_DEPENDENCIES="false" -ENV UPGRADE_TO_NEWER_DEPENDENCIES=${UPGRADE_TO_NEWER_DEPENDENCIES} - # Those are additional constraints that are needed for some extras but we do not want to # Force them on the main Airflow package. Those limitations are: -# * chardet,<4: required by snowflake provider https://github.com/snowflakedb/snowflake-connector-python/blob/v2.3.6/setup.py#L201 +# * chardet,<4: required by snowflake provider # * lazy-object-proxy<1.5.0: required by astroid # * pyOpenSSL: Imposed by snowflake provider https://github.com/snowflakedb/snowflake-connector-python/blob/v2.3.6/setup.py#L201 -# * requests>=2.20.0,<2.24.0: required by snowflake provider https://github.com/snowflakedb/snowflake-connector-python/blob/v2.3.6/setup.py#L201 -# * urllib3<1.26: Required to keep boto3 and snowflake happy - -ARG EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS="chardet<4 lazy-object-proxy<1.5.0 pyOpenSSL<20.0.0 requests>=2.20.0,<2.24.0 urllib3<1.26" +# * urllib3<1.26: Required to keep boto3 happy +ARG EAGER_UPGRADE_ADDITIONAL_REQUIREMENTS="chardet<4 lazy-object-proxy<1.5.0 pyOpenSSL<20.0.0 urllib3<1.26" ARG CONTINUE_ON_PIP_CHECK_FAILURE="false" diff --git a/IMAGES.rst b/IMAGES.rst index cd72a0606d7bd..2de6e995d8c32 100644 --- a/IMAGES.rst +++ b/IMAGES.rst @@ -401,9 +401,6 @@ The following build arguments (``--build-arg`` in docker build command) can be u +------------------------------------------+------------------------------------------+------------------------------------------+ | ``AIRFLOW_SOURCES`` | ``/opt/airflow`` | Mounted sources of Airflow | +------------------------------------------+------------------------------------------+------------------------------------------+ -| ``PIP_DEPENDENCIES_EPOCH_NUMBER`` | ``3`` | increasing that number will reinstall | -| | | all PIP dependencies | -+------------------------------------------+------------------------------------------+------------------------------------------+ | ``CASS_DRIVER_NO_CYTHON`` | ``1`` | if set to 1 no CYTHON compilation is | | | | done for cassandra driver (much faster) | +------------------------------------------+------------------------------------------+------------------------------------------+ diff --git a/scripts/in_container/_in_container_utils.sh b/scripts/in_container/_in_container_utils.sh index cd3dbf55586cd..a811e21cfa1b7 100644 --- a/scripts/in_container/_in_container_utils.sh +++ b/scripts/in_container/_in_container_utils.sh @@ -302,8 +302,22 @@ function install_airflow_from_sdist() { pip install "${airflow_package}${1}" >"${OUTPUT_PRINTED_ONLY_ON_ERROR}" 2>&1 } +function reinstall_azure_storage_blob() { + group_start "Reinstalls azure-storage-blob (temporary workaround)" + # Reinstall azure-storage-blob here until https://github.com/apache/airflow/pull/12188 is solved + # Azure-storage-blob need to be reinstalled to overwrite azure-storage-blob installed by old version + # of the `azure-storage` library + echo + echo "Reinstalling azure-storage-blob" + echo + pip install azure-storage-blob --no-deps --force-reinstall + group_end +} + function install_remaining_dependencies() { + group_start "Installs all remaining dependencies that are not installed by '${AIRFLOW_EXTRAS}' " pip install apache-beam[gcp] >"${OUTPUT_PRINTED_ONLY_ON_ERROR}" 2>&1 + group_end } function uninstall_airflow() { diff --git a/scripts/in_container/run_ci_tests.sh b/scripts/in_container/run_ci_tests.sh index c5348f7103a22..94a78791492cc 100755 --- a/scripts/in_container/run_ci_tests.sh +++ b/scripts/in_container/run_ci_tests.sh @@ -18,6 +18,8 @@ # shellcheck source=scripts/in_container/_in_container_script_init.sh . "$( dirname "${BASH_SOURCE[0]}" )/_in_container_script_init.sh" +reinstall_azure_storage_blob + echo echo "Starting the tests with those pytest arguments:" "${@}" echo diff --git a/scripts/in_container/run_install_and_test_provider_packages.sh b/scripts/in_container/run_install_and_test_provider_packages.sh index ef2227c627cfe..609d00418af56 100755 --- a/scripts/in_container/run_install_and_test_provider_packages.sh +++ b/scripts/in_container/run_install_and_test_provider_packages.sh @@ -75,12 +75,6 @@ function install_airflow_as_specified() { group_end } -function install_deps() { - group_start "Installs all remaining dependencies that are not installed by '${AIRFLOW_EXTRAS}' " - install_remaining_dependencies - group_end -} - function install_provider_packages() { group_start "Install provider packages" if [[ ${PACKAGE_FORMAT} == "wheel" ]]; then @@ -202,7 +196,8 @@ function discover_all_field_behaviours() { setup_provider_packages verify_parameters install_airflow_as_specified -install_deps +install_remaining_dependencies +reinstall_azure_storage_blob install_provider_packages import_all_provider_classes diff --git a/scripts/in_container/run_prepare_provider_readme.sh b/scripts/in_container/run_prepare_provider_readme.sh index d98de3100dd1a..69c42feefbdfa 100755 --- a/scripts/in_container/run_prepare_provider_readme.sh +++ b/scripts/in_container/run_prepare_provider_readme.sh @@ -30,10 +30,8 @@ verify_suffix_versions_for_package_preparation pip install --upgrade "pip==${AIRFLOW_PIP_VERSION}" # TODO: remove it when devel_all == devel_ci -echo -echo "Installing remaining packages from 'all' extras" -echo -pip install ".[devel_all]" +install_remaining_dependencies +reinstall_azure_storage_blob cd "${AIRFLOW_SOURCES}/provider_packages" || exit 1 diff --git a/setup.py b/setup.py index 46a21288bbaf4..c571068febdbc 100644 --- a/setup.py +++ b/setup.py @@ -429,14 +429,10 @@ def get_sphinx_theme_version() -> str: # once it is merged, we can move those two back to `azure` extra. 'azure-storage-blob', 'azure-storage-common', - # snowflake is not compatible with latest version. - # This library monkey patches the requests library, so SSL is broken globally. - # See: https://github.com/snowflakedb/snowflake-connector-python/issues/324 - 'requests<2.24.0', - # Newest version drop support for old version of azure-storage-blob - # Until #12188 is solved at least we need to limit maximum version. - # https://github.com/apache/airflow/pull/12188 - 'snowflake-connector-python>=1.5.2,<=2.3.6', + # Snowflake conector > 2.3.8 is needed because it has vendored urrllib3 and requests libraries which + # are monkey-patched. In earlier versions of the library, monkeypatching the libraries by snowflake + # caused other providers to fail (Google, Amazon etc.) + 'snowflake-connector-python>=2.3.8', 'snowflake-sqlalchemy>=1.1.0', ] spark = [ @@ -473,11 +469,6 @@ def get_sphinx_theme_version() -> str: ] # End dependencies group -############################################################################################################ -# IMPORTANT NOTE!!!!!!!!!!!!!!! -# IF you are removing dependencies from this list, please make sure that you also increase -# PIP_DEPENDENCIES_EPOCH_NUMBER in the Dockerfile.ci and Dockerfile -############################################################################################################ devel = [ 'beautifulsoup4~=4.7.1', 'black', @@ -522,26 +513,9 @@ def get_sphinx_theme_version() -> str: 'yamllint', ] -############################################################################################################ -# IMPORTANT NOTE!!!!!!!!!!!!!!! -# If you are removing dependencies from the above list, please make sure that you also increase -# PIP_DEPENDENCIES_EPOCH_NUMBER in the Dockerfile.ci and Dockerfile -############################################################################################################ - devel_minreq = cgroups + devel + doc + kubernetes + mysql + password devel_hadoop = devel_minreq + hdfs + hive + kerberos + presto + webhdfs -############################################################################################################ -# IMPORTANT NOTE!!!!!!!!!!!!!!! -# If you have a 'pip check' problem with dependencies, it might be because some dependency has been -# installed via 'install_requires' in setup.cfg in higher version than required in one of the options below. -# For example pip check was failing with requests=2.25.1 installed even if in some dependencies below -# < 2.24.0 was specified for it. Solution in such case is to add such limiting requirement to -# install_requires in setup.cfg (we've added requests<2.24.0 there to limit requests library). -# This should be done with appropriate comment explaining why the requirement was added. -############################################################################################################ - - # Dict of all providers which are part of the Apache Airflow repository together with their requirements PROVIDERS_REQUIREMENTS: Dict[str, List[str]] = { 'amazon': amazon, @@ -641,7 +615,6 @@ def add_extras_for_all_providers() -> None: add_extras_for_all_providers() -############################################################################################################# ############################################################################################################# # The whole section can be removed in Airflow 3.0 as those old aliases are deprecated in 2.* series ############################################################################################################# @@ -750,10 +723,9 @@ def add_extras_for_all_deprecated_aliases() -> None: # Those packages are excluded because they break tests (downgrading mock and few other requirements) # and they are not needed to run our test suite. This can be removed as soon as we get non-conflicting -# requirements for the apache-beam as well. This waits for azure + snowflake fixes: +# requirements for the apache-beam as well. This waits for azure fixes: # # * Azure: https://github.com/apache/airflow/issues/11968 -# * Snowflake: https://github.com/apache/airflow/issues/12881 # PACKAGES_EXCLUDED_FOR_CI = [ 'apache-beam', diff --git a/tests/providers/presto/hooks/test_presto.py b/tests/providers/presto/hooks/test_presto.py index a729d6fc5b75e..02d2a62a88d72 100644 --- a/tests/providers/presto/hooks/test_presto.py +++ b/tests/providers/presto/hooks/test_presto.py @@ -217,23 +217,6 @@ def test_should_record_records(self): records = hook.get_records(sql) self.assertEqual([['Customer#000000001'], ['Customer#000000002'], ['Customer#000000003']], records) - @pytest.mark.xfail( - condition=True, - reason=""" -This test will fail when full suite of tests are run, because of Snowflake monkeypatching urllib3 -library as described in https://github.com/snowflakedb/snowflake-connector-python/issues/324 -the offending code is here: -https://github.com/snowflakedb/snowflake-connector-python/blob/133d6215f7920d304c5f2d466bae38127c1b836d/src/snowflake/connector/network.py#L89-L92 - -This test however runs fine when run in total isolation. We could move it to Heisentests, but then we would -have to enable integrations there and make sure no snowflake gets imported. - -In the future Snowflake plans to get rid of the MonkeyPatching. - -Issue to keep track of it: https://github.com/apache/airflow/issues/12881 - -""", - ) @pytest.mark.integration("presto") @pytest.mark.integration("kerberos") def test_should_record_records_with_kerberos_auth(self):