From 6f3bd3aa84146fadbec17507f4e7de4dea291465 Mon Sep 17 00:00:00 2001 From: Marcin Krystianc Date: Thu, 9 Jan 2025 09:58:20 +0100 Subject: [PATCH] Arrow ~=18.1.0 (#76) --- .github/workflows/python.yml | 4 +++- python/pyproject.toml | 7 ++++--- python/requirements.txt | 4 +--- python/test/test_palletjack.py | 4 ++++ 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index bcda68a..7669331 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -136,7 +136,7 @@ jobs: run: python -m cibuildwheel --output-dir dist # to supply options, put them in 'env', like: env: - CIBW_REPAIR_WHEEL_COMMAND_LINUX: auditwheel repair --exclude libarrow.so.1700 --exclude libparquet.so.1700 -w {dest_dir} {wheel} + CIBW_REPAIR_WHEEL_COMMAND_LINUX: auditwheel repair --exclude libarrow.so.1801 --exclude libparquet.so.1801 -w {dest_dir} {wheel} CIBW_ENVIRONMENT: VCPKG_TARGET_TRIPLET="${{ steps.vcpkg-info.outputs.triplet }}" CIBW_BUILD_VERBOSITY: 1 # We use manylinux_2_28 for ABI compatibility with pyarrow @@ -181,6 +181,7 @@ jobs: - name: Test with pytest run: | + pip install -r python/requirements.txt # Keep in mind that if the local and remote versions are the same, the remote version will be installed pip install PalletJack --pre --find-links ./dist --break-system-packages --only-binary=:all: # So now ensure that the local version is installed @@ -230,6 +231,7 @@ jobs: - name: Run benchmarks run: | + pip install -r python/requirements.txt # Keep in mind that if the local and remote versions are the same, the remote version will be installed pip install PalletJack --pre --find-links ./dist --break-system-packages --only-binary=:all: # So now ensure that the local version is installed diff --git a/python/pyproject.toml b/python/pyproject.toml index 2b2f11b..6c41e89 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -2,7 +2,8 @@ requires = [ "setuptools>=55.0", "Cython>=3", - "pyarrow~=17.0", + "numpy>=1.16.6", + "pyarrow~=18.1.0", "thrift", ] @@ -10,7 +11,7 @@ build-backend = "setuptools.build_meta" [project] name = "palletjack" -version = "2.3.1" +version = "2.4.0" description = "Faster parquet metadata reading" readme = "README.md" requires-python = ">=3.9" @@ -20,7 +21,7 @@ classifiers = [ "Operating System :: OS Independent", ] dependencies = [ - "pyarrow~=17.0", + "pyarrow~=18.1.0", ] [tool.setuptools.packages.find] diff --git a/python/requirements.txt b/python/requirements.txt index 382afb7..24ce15a 100644 --- a/python/requirements.txt +++ b/python/requirements.txt @@ -1,3 +1 @@ -setuptools>=55.0 -Cython>=3 -pyarrow~=16.0 +numpy diff --git a/python/test/test_palletjack.py b/python/test/test_palletjack.py index 7ea1c04..be42c32 100644 --- a/python/test/test_palletjack.py +++ b/python/test/test_palletjack.py @@ -33,6 +33,10 @@ class TestPalletJack(unittest.TestCase): def test_read_metadata_columns_rows(self): def validate_reading(parquet_path, index_path, row_groups, column_indices): + + # Passing an empty list to the read_row_groups method is an invalid operation since Arrow 18.0. + if (len(row_groups) == 0): return + # Reading using the original metadata pr = pq.ParquetReader() pr.open(parquet_path)