diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index f6e3701a8..dc0cfd055 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -292,6 +292,79 @@ jobs: AIRFLOW_CONN_AIRFLOW_DB: postgres://postgres:postgres@0.0.0.0:5432/postgres PYTHONPATH: /home/runner/work/astronomer-cosmos/astronomer-cosmos/:$PYTHONPATH + Run-Integration-Tests-DBT-1-5-4: + needs: Authorize + runs-on: ubuntu-latest + strategy: + matrix: + python-version: [ "3.11" ] + airflow-version: [ "2.7" ] + services: + postgres: + image: postgres + env: + POSTGRES_PASSWORD: postgres + options: >- + --health-cmd pg_isready + --health-interval 10s + --health-timeout 5s + --health-retries 5 + ports: + - 5432:5432 + + steps: + - uses: actions/checkout@v3 + with: + ref: ${{ github.event.pull_request.head.sha || github.ref }} + - uses: actions/cache@v3 + with: + path: | + ~/.cache/pip + .nox + key: integration-dbt-1-5-4-${{ runner.os }}-${{ matrix.python-version }}-${{ matrix.airflow-version }}-${{ hashFiles('pyproject.toml') }}-${{ hashFiles('cosmos/__init__.py') }} + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + + - name: Install packages and dependencies + run: | + python -m pip install hatch + hatch -e tests.py${{ matrix.python-version }}-${{ matrix.airflow-version }} run pip freeze + + - name: Test Cosmos against Airflow ${{ matrix.airflow-version }}, Python ${{ matrix.python-version }} and dbt 1.5.4 + run: | + hatch run tests.py${{ matrix.python-version }}-${{ matrix.airflow-version }}:test-integration-dbt-1-5-4 + env: + AIRFLOW_HOME: /home/runner/work/astronomer-cosmos/astronomer-cosmos/ + AIRFLOW_CONN_AIRFLOW_DB: postgres://postgres:postgres@0.0.0.0:5432/postgres + AIRFLOW__CORE__DAGBAG_IMPORT_TIMEOUT: 90.0 + PYTHONPATH: /home/runner/work/astronomer-cosmos/astronomer-cosmos/:$PYTHONPATH + AIRFLOW_CONN_DATABRICKS_DEFAULT: ${{ secrets.AIRFLOW_CONN_DATABRICKS_DEFAULT }} + DATABRICKS_HOST: ${{ secrets.DATABRICKS_HOST }} + DATABRICKS_TOKEN: ${{ secrets.DATABRICKS_TOKEN }} + DATABRICKS_WAREHOUSE_ID: ${{ secrets.DATABRICKS_WAREHOUSE_ID }} + DATABRICKS_CLUSTER_ID: ${{ secrets.DATABRICKS_CLUSTER_ID }} + COSMOS_CONN_POSTGRES_PASSWORD: ${{ secrets.COSMOS_CONN_POSTGRES_PASSWORD }} + POSTGRES_HOST: localhost + POSTGRES_USER: postgres + POSTGRES_PASSWORD: postgres + POSTGRES_DB: postgres + POSTGRES_SCHEMA: public + POSTGRES_PORT: 5432 + + - name: Upload coverage to Github + uses: actions/upload-artifact@v2 + with: + name: coverage-integration-dbt-1-5-4-test-${{ matrix.python-version }}-${{ matrix.airflow-version }} + path: .coverage + + env: + AIRFLOW_HOME: /home/runner/work/astronomer-cosmos/astronomer-cosmos/ + AIRFLOW_CONN_AIRFLOW_DB: postgres://postgres:postgres@0.0.0.0:5432/postgres + PYTHONPATH: /home/runner/work/astronomer-cosmos/astronomer-cosmos/:$PYTHONPATH + Run-Performance-Tests: needs: Authorize runs-on: ubuntu-latest diff --git a/cosmos/cache.py b/cosmos/cache.py index 3c2086c7a..7d136a127 100644 --- a/cosmos/cache.py +++ b/cosmos/cache.py @@ -3,6 +3,7 @@ import shutil from pathlib import Path +import msgpack from airflow.models.dag import DAG from airflow.utils.task_group import TaskGroup @@ -121,4 +122,21 @@ def _copy_partial_parse_to_project(partial_parse_filepath: Path, project_path: P source_manifest_filepath = partial_parse_filepath.parent / DBT_MANIFEST_FILE_NAME target_manifest_filepath = target_partial_parse_file.parent / DBT_MANIFEST_FILE_NAME shutil.copy(str(partial_parse_filepath), str(target_partial_parse_file)) + + # Update root_path in partial parse file to point to the needed project directory. This is necessary because + # an issue is observed where on specific earlier versions of dbt-core like 1.5.4 and 1.6.5, the commands fail to + # locate project files as they are pointed to a stale directory by the root_path in the partial parse file. + # This issue was not observed on recent versions of dbt-core 1.5.8, 1.6.6, 1.7.0 and 1.8.0 as tested on. + # It is suspected that PR dbt-labs/dbt-core#8762 is likely the fix and the fix appears to be backported to later + # version releases of 1.5.x and 1.6.x. However, the below modification is applied to ensure that the root_path is + # correctly set to the needed project directory and the feature is compatible across all dbt-core versions. + with target_partial_parse_file.open("rb") as f: + data = msgpack.unpack(f) + for node in data["nodes"].values(): + if node.get("root_path"): + node["root_path"] = str(project_path) + with target_partial_parse_file.open("wb") as f: + packed = msgpack.packb(data) + f.write(packed) + shutil.copy(str(source_manifest_filepath), str(target_manifest_filepath)) diff --git a/docs/requirements.txt b/docs/requirements.txt index 430993ff8..81a7084e4 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,10 +1,11 @@ -google-re2==1.1 aenum -sphinx -pydata-sphinx-theme -sphinx-autobuild -sphinx-autoapi apache-airflow apache-airflow-providers-cncf-kubernetes>=5.1.1 +google-re2==1.1 +msgpack openlineage-airflow pydantic +pydata-sphinx-theme +sphinx +sphinx-autoapi +sphinx-autobuild diff --git a/pyproject.toml b/pyproject.toml index 5f0e5ee0e..f740f2071 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,6 +32,7 @@ dependencies = [ "apache-airflow>=2.3.0", "importlib-metadata; python_version < '3.8'", "Jinja2>=3.0.0", + "msgpack", "pydantic>=1.10.0", "typing-extensions; python_version < '3.8'", "virtualenv", @@ -141,16 +142,17 @@ matrix.airflow.dependencies = [ [tool.hatch.envs.tests.scripts] freeze = "pip freeze" -type-check = "mypy cosmos" test = 'sh scripts/test/unit.sh' test-cov = 'sh scripts/test/unit-cov.sh' -test-integration-setup = 'sh scripts/test/integration-setup.sh' test-integration = 'sh scripts/test/integration.sh' +test-integration-dbt-1-5-4 = 'sh scripts/test/integration-dbt-1-5-4.sh' test-integration-expensive = 'sh scripts/test/integration-expensive.sh' -test-integration-sqlite-setup = 'sh scripts/test/integration-sqlite-setup.sh' +test-integration-setup = 'sh scripts/test/integration-setup.sh' test-integration-sqlite = 'sh scripts/test/integration-sqlite.sh' -test-performance-setup = 'sh scripts/test/performance-setup.sh' +test-integration-sqlite-setup = 'sh scripts/test/integration-sqlite-setup.sh' test-performance = 'sh scripts/test/performance.sh' +test-performance-setup = 'sh scripts/test/performance-setup.sh' +type-check = "mypy cosmos" [tool.pytest.ini_options] filterwarnings = ["ignore::DeprecationWarning"] @@ -164,13 +166,14 @@ markers = ["integration", "sqlite", "perf"] [tool.hatch.envs.docs] dependencies = [ "aenum", - "sphinx", - "pydata-sphinx-theme", - "sphinx-autobuild", - "sphinx-autoapi", - "openlineage-airflow", "apache-airflow-providers-cncf-kubernetes>=5.1.1", + "msgpack", + "openlineage-airflow", "pydantic>=1.10.0", + "pydata-sphinx-theme", + "sphinx", + "sphinx-autoapi", + "sphinx-autobuild", ] [tool.hatch.envs.docs.scripts] diff --git a/scripts/test/integration-dbt-1-5-4.sh b/scripts/test/integration-dbt-1-5-4.sh new file mode 100644 index 000000000..087533082 --- /dev/null +++ b/scripts/test/integration-dbt-1-5-4.sh @@ -0,0 +1,12 @@ +pip uninstall dbt-adapters dbt-common dbt-core dbt-extractor dbt-postgres dbt-semantic-interfaces -y +pip install dbt-postgres==1.5.4 dbt-databricks==1.5.4 +rm -rf airflow.*; \ +airflow db init; \ +pytest -vv \ + --cov=cosmos \ + --cov-report=term-missing \ + --cov-report=xml \ + --durations=0 \ + -m integration \ + --ignore=tests/perf \ + -k 'basic_cosmos_task_group'