Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ci(ingestion): fix airflow 1 deps for tox #4083

Merged
merged 8 commits into from
Feb 17, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions metadata-ingestion/scripts/airflow1-constraints.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#!/bin/bash
set -euo pipefail

HEADER=$(cat <<-EOF
#
# This file helps pip resolve dependencies for Airflow 1.x in a reasonable amount
# of time during testing. Without these constraints, pip will spend hours
# backtracking in an attempt to find a compatible list of versions.
# See https://pip.pypa.io/en/latest/topics/dependency-resolution/#backtracking
# for some explanation of backtracing with the new behavior in pip 20.3+.
#
EOF
)

# Setup a clean virtualenv and install dev deps.
../gradlew clean installDev

# Save a copy of the pip environment.
pip freeze > requirements-dev.txt

# Install Airflow 1.10.15. This will automatically uninstall all incompatible dependencies versions
# and replace them with compatible ones. One minor snag: we need to manually remove the Airflow
# 2.x providers that were split into separate packages, since pip won't remove those automatically.
pip uninstall -y apache-airflow-providers-http apache-airflow-providers-snowflake
pip install -e '.[dev-airflow1-base]'

# Save another copy of the pip environment.
pip freeze > requirements-dev-airflow1.txt

# Add updated dependencies to the constraints file.
# This gets all lines in dev-airflow1.txt that are not in dev.txt.
comm -23 requirements-dev-airflow1.txt requirements-dev.txt > airflow1-constraints-data.txt

# Add a timestamp and comment header to the top of the file.
(echo "# Generated by scripts/airflow1-constraints.sh on $(date)." && echo "$HEADER" && cat airflow1-constraints-data.txt) > tests/airflow1-constraints.txt

# Cleanup.
mv requirements-dev.txt requirements-dev-airflow1.txt airflow1-constraints-data.txt /tmp
11 changes: 8 additions & 3 deletions metadata-ingestion/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,19 +241,23 @@ def get_long_description():
"apache-airflow[snowflake]>=2.0.2", # snowflake is used in example dags
"snowflake-sqlalchemy<=1.2.4", # make constraint consistent with extras
}
dev_requirements_airflow_1 = {
*base_dev_requirements,
dev_requirements_airflow_1_base = {
"apache-airflow==1.10.15",
"apache-airflow-backport-providers-snowflake",
"snowflake-sqlalchemy<=1.2.4", # make constraint consistent with extras
"WTForms==2.3.3", # make constraint consistent with extras
}
dev_requirements_airflow_1 = {
*base_dev_requirements,
*dev_requirements_airflow_1_base,
}

full_test_dev_requirements = {
*list(
dependency
for plugin in [
"athena",
# Only include Athena for Python 3.7 or newer.
*(["athena"] if is_py37_or_newer else []),
"druid",
"feast",
"hive",
Expand Down Expand Up @@ -395,6 +399,7 @@ def get_long_description():
)
),
"dev": list(dev_requirements),
"dev-airflow1-base": list(dev_requirements_airflow_1_base),
"dev-airflow1": list(dev_requirements_airflow_1),
"integration-tests": list(full_test_dev_requirements),
},
Expand Down
21 changes: 15 additions & 6 deletions metadata-ingestion/src/datahub/emitter/rest_emitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,12 +114,21 @@ def __init__(
if retry_max_times:
self._retry_max_times = retry_max_times

retry_strategy = Retry(
total=self._retry_max_times,
status_forcelist=self._retry_status_codes,
backoff_factor=2,
allowed_methods=self._retry_methods,
)
try:
retry_strategy = Retry(
total=self._retry_max_times,
status_forcelist=self._retry_status_codes,
backoff_factor=2,
allowed_methods=self._retry_methods,
)
except TypeError:
# Prior to urllib3 1.26, the Retry class used `method_whitelist` instead of `allowed_methods`.
retry_strategy = Retry(
total=self._retry_max_times,
status_forcelist=self._retry_status_codes,
backoff_factor=2,
method_whitelist=self._retry_methods,
)

adapter = HTTPAdapter(
pool_connections=100, pool_maxsize=100, max_retries=retry_strategy
Expand Down
49 changes: 49 additions & 0 deletions metadata-ingestion/tests/airflow1-constraints.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# Generated by scripts/airflow1-constraints.sh on Fri Feb 11 13:16:30 EST 2022.
#
# This file helps pip resolve dependencies for Airflow 1.x in a reasonable amount
# of time during testing. Without these constraints, pip will spend hours
# backtracking in an attempt to find a compatible list of versions.
# See https://pip.pypa.io/en/latest/topics/dependency-resolution/#backtracking
# for some explanation of backtracing with the new behavior in pip 20.3+.
#
apache-airflow==1.10.15
apache-airflow-backport-providers-snowflake==2021.3.13
apache-airflow-providers-ftp==2.0.1
apache-airflow-providers-imap==2.1.0
apache-airflow-providers-sqlite==2.0.1
apispec==1.3.3
cached-property==1.5.2
chardet==3.0.4
click==7.1.2
click-default-group==1.2.2
colorlog==4.0.2
configparser==3.5.3
croniter==0.3.37
Flask-Admin==1.5.4
Flask-AppBuilder==2.3.4
Flask-Babel==1.0.0
Flask-Caching==1.3.3
flask-swagger==0.2.14
Flask-WTF==0.14.3
funcsigs==1.0.2
idna==2.10
importlib-resources==1.5.0
Jinja2==2.11.3
json-merge-patch==0.2
lazy-object-proxy==1.4.3
Markdown==2.6.11
marshmallow==2.21.0
marshmallow-enum==1.5.1
marshmallow-oneofschema==3.0.1
marshmallow-sqlalchemy==0.23.1
natsort==8.1.0
pendulum==1.4.4
requests==2.23.0
requests-mock==1.9.3
requests-toolbelt==0.9.1
SQLAlchemy-JSONField==0.9.0
tenacity==4.12.0
tzlocal==1.5.1
urllib3==1.25.11
Werkzeug==0.16.1
zope.deprecation==4.4.0
25 changes: 10 additions & 15 deletions metadata-ingestion/tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@
# and then run "tox" from this directory.

[tox]
envlist = py3-quick,{py36,py39}-full,py3-airflow1
envlist = py3-quick,py3-full,py3-airflow1

[gh-actions]
python =
3.6: py36-full
3.9: py39-full
3.6: py3-full, py3-airflow1
3.9: py3-full, py3-airflow1

# Providing optional features that add dependencies from setup.py as deps here
# allows tox to recreate testenv when new dependencies are added to setup.py.
Expand All @@ -19,14 +19,11 @@ python =

[testenv]
deps =
-rtox_requirements/dev_requirements.txt
.[dev]
commands =
pytest --cov={envsitepackagesdir}/datahub --cov={envsitepackagesdir}/datahub_provider \
py3-quick: -m 'not integration and not slow_integration' --junit-xml=junit.quick.xml \
py36-full: --cov-fail-under 68 --junit-xml=junit.full.xml \
--continue-on-collection-errors \
-vv \
py39-full: --cov-fail-under 70 --junit-xml=junit.full.xml \
py3-quick,py3-airflow1: -m 'not integration and not slow_integration' --junit-xml=junit.quick.xml \
py3-full: --cov-fail-under 65 --junit-xml=junit.full.xml \
--continue-on-collection-errors \
-vv

Expand All @@ -36,11 +33,9 @@ setenv =
[testenv:py3-airflow1]
deps =
.[dev-airflow1]
-c tests/airflow1-constraints.txt

[testenv:py39-full]
deps =
-rtox_requirements/py39-full_requirements.txt

[testenv:py36-full]
[testenv:py3-full]
deps =
-rtox_requirements/py36-full_requirements.txt
.[dev]
.[integration-tests]
75 changes: 0 additions & 75 deletions metadata-ingestion/tox_requirements/dev_requirements.txt

This file was deleted.

Loading