diff --git a/.github/workflows/test_common.yml b/.github/workflows/test_common.yml index 72c3825383..359ed43095 100644 --- a/.github/workflows/test_common.yml +++ b/.github/workflows/test_common.yml @@ -116,7 +116,7 @@ jobs: shell: cmd - name: Install pyarrow - run: poetry install --no-interaction -E duckdb -E cli -E parquet --with sentry-sdk + run: poetry install --no-interaction -E duckdb -E cli -E parquet --with sentry-sdk && poetry run pip install pyarrow==15.0.2 - run: | poetry run pytest tests/pipeline/test_pipeline_extra.py -k arrow @@ -129,7 +129,7 @@ jobs: shell: cmd - name: Install pipeline and sources dependencies - run: poetry install --no-interaction -E duckdb -E cli -E parquet -E deltalake -E sql_database --with sentry-sdk,pipeline,sources + run: poetry install --no-interaction -E duckdb -E cli -E parquet -E deltalake -E sql_database --with sentry-sdk,pipeline,sources && poetry run pip install pyarrow==15.0.2 - run: | poetry run pytest tests/extract tests/pipeline tests/libs tests/cli/common tests/destinations tests/sources @@ -155,6 +155,20 @@ jobs: name: Run extract tests Windows shell: cmd + # here we upgrade pyarrow to 17 and run the libs tests again + - name: Install pyarrow 17 + run: poetry run pip install pyarrow==17.0.0 + + - run: | + poetry run pytest tests/libs + if: runner.os != 'Windows' + name: Run libs tests Linux/MAC + - run: | + poetry run pytest tests/libs + if: runner.os == 'Windows' + name: Run libs tests Windows + shell: cmd + # - name: Install Pydantic 1.0 # run: pip install "pydantic<2" diff --git a/.github/workflows/test_pyarrow17.yml b/.github/workflows/test_pyarrow17.yml deleted file mode 100644 index 941469bd4e..0000000000 --- a/.github/workflows/test_pyarrow17.yml +++ /dev/null @@ -1,80 +0,0 @@ - -name: tests marked as needspyarrow17 - -on: - pull_request: - branches: - - master - - devel - workflow_dispatch: - schedule: - - cron: '0 2 * * *' - -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} - cancel-in-progress: true - -env: - - DLT_SECRETS_TOML: ${{ secrets.DLT_SECRETS_TOML }} - - # RUNTIME__SENTRY_DSN: https://6f6f7b6f8e0f458a89be4187603b55fe@o1061158.ingest.sentry.io/4504819859914752 - RUNTIME__LOG_LEVEL: ERROR - RUNTIME__DLTHUB_TELEMETRY_ENDPOINT: ${{ secrets.RUNTIME__DLTHUB_TELEMETRY_ENDPOINT }} - - ACTIVE_DESTINATIONS: "[\"filesystem\"]" - ALL_FILESYSTEM_DRIVERS: "[\"memory\", \"file\", \"r2\", \"s3\", \"gs\", \"az\", \"abfss\", \"gdrive\"]" #excludes sftp - -jobs: - get_docs_changes: - name: docs changes - uses: ./.github/workflows/get_docs_changes.yml - if: ${{ !github.event.pull_request.head.repo.fork || contains(github.event.pull_request.labels.*.name, 'ci from fork')}} - - run_pyarrow17: - name: needspyarrow17 tests - needs: get_docs_changes - if: needs.get_docs_changes.outputs.changes_outside_docs == 'true' - defaults: - run: - shell: bash - runs-on: "ubuntu-latest" - - steps: - - - name: Check out - uses: actions/checkout@master - - - name: Setup Python - uses: actions/setup-python@v4 - with: - python-version: "3.10.x" - - - name: Install Poetry - uses: snok/install-poetry@v1.3.2 - with: - virtualenvs-create: true - virtualenvs-in-project: true - installer-parallel: true - - - name: Load cached venv - id: cached-poetry-dependencies - uses: actions/cache@v3 - with: - path: .venv - key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-pyarrow17 - - - name: Install dependencies - run: poetry install --no-interaction --with sentry-sdk --with pipeline -E deltalake -E duckdb -E filesystem -E gs -E s3 -E az - - - - name: Upgrade pyarrow - run: poetry run pip install pyarrow==17.0.0 - - - name: create secrets.toml - run: pwd && echo "$DLT_SECRETS_TOML" > tests/.dlt/secrets.toml - - - name: Run needspyarrow17 tests Linux - run: | - poetry run pytest tests/libs -m "needspyarrow17" - poetry run pytest tests/load -m "needspyarrow17" diff --git a/tests/load/pipeline/test_filesystem_pipeline.py b/tests/load/pipeline/test_filesystem_pipeline.py index 2ad175c8f5..8d890642ee 100644 --- a/tests/load/pipeline/test_filesystem_pipeline.py +++ b/tests/load/pipeline/test_filesystem_pipeline.py @@ -33,6 +33,7 @@ MEMORY_BUCKET, FILE_BUCKET, AZ_BUCKET, + SFTP_BUCKET, ) from tests.pipeline.utils import load_table_counts, assert_load_info, load_tables_to_dicts @@ -222,6 +223,9 @@ def some_source(): assert table.column("value").to_pylist() == [1, 2, 3, 4, 5] +@pytest.mark.skip( + reason="pyarrow version check not needed anymore, since we have 17 as a dependency" +) def test_delta_table_pyarrow_version_check() -> None: """Tests pyarrow version checking for `delta` table format. @@ -255,7 +259,7 @@ def foo(): destinations_configs( table_format_filesystem_configs=True, with_table_format="delta", - bucket_exclude=(MEMORY_BUCKET), + bucket_exclude=(MEMORY_BUCKET, SFTP_BUCKET), ), ids=lambda x: x.name, ) @@ -982,7 +986,7 @@ def parent_delta(): destinations_configs( table_format_filesystem_configs=True, with_table_format="delta", - bucket_subset=(FILE_BUCKET,), + bucket_subset=(FILE_BUCKET), ), ids=lambda x: x.name, ) diff --git a/tests/load/pipeline/test_merge_disposition.py b/tests/load/pipeline/test_merge_disposition.py index 9dbb9cb9d3..9e799d4de6 100644 --- a/tests/load/pipeline/test_merge_disposition.py +++ b/tests/load/pipeline/test_merge_disposition.py @@ -40,6 +40,7 @@ DestinationTestConfiguration, FILE_BUCKET, AZ_BUCKET, + SFTP_BUCKET, ) diff --git a/tests/load/utils.py b/tests/load/utils.py index e7d6476a3a..3edf111a36 100644 --- a/tests/load/utils.py +++ b/tests/load/utils.py @@ -1004,7 +1004,7 @@ def prepare_load_package( def sequence_generator() -> Generator[List[Dict[str, str]], None, None]: count = 1 while True: - yield [{"content": str(count + i)} for i in range(2000)] + yield [{"content": str(count + i)} for i in range(3)] count += 3