Skip to content

Faster parquet streaming + filters with predicate pushdown #5140

Faster parquet streaming + filters with predicate pushdown

Faster parquet streaming + filters with predicate pushdown #5140

Workflow file for this run

name: CI
on:
pull_request:
branches:
- main
push:
branches:
- main
- ci-*
env:
HF_ALLOW_CODE_EVAL: 1
jobs:
check_code_quality:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.9"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install .[quality]
- name: Check quality
run: |
ruff check tests src benchmarks utils setup.py # linter
ruff format --check tests src benchmarks utils setup.py # formatter
test:
needs: check_code_quality
strategy:
matrix:
test: ['unit', 'integration']
os: [ubuntu-latest, windows-latest]
deps_versions: [deps-latest, deps-minimum]
continue-on-error: ${{ matrix.test == 'integration' }}
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Set up Python 3.9
uses: actions/setup-python@v5
with:
python-version: "3.9"
- name: Upgrade pip
run: python -m pip install --upgrade pip
- name: Pin setuptools-scm
if: ${{ matrix.os == 'ubuntu-latest' }}
run: echo "installing pinned version of setuptools-scm to fix seqeval installation on 3.7" && pip install "setuptools-scm==6.4.2"
- name: Install uv
run: pip install --upgrade uv
- name: Install dependencies
run: uv pip install --system "datasets[tests] @ ."
- name: Install dependencies (latest versions)
if: ${{ matrix.deps_versions == 'deps-latest' }}
run: uv pip install --system --upgrade pyarrow huggingface-hub "dill<0.3.9"
- name: Install dependencies (minimum versions)
if: ${{ matrix.deps_versions != 'deps-latest' }}
run: uv pip install --system pyarrow==15.0.0 huggingface-hub==0.23.5 transformers dill==0.3.1.1
- name: Test with pytest
run: |
python -m pytest -rfExX -m ${{ matrix.test }} -n 2 --dist loadfile -sv ./tests/
test_py311:
needs: check_code_quality
strategy:
matrix:
test: ['unit']
os: [ubuntu-latest, windows-latest]
deps_versions: [deps-latest]
continue-on-error: false
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Set up Python 3.11
uses: actions/setup-python@v5
with:
python-version: "3.11"
- name: Upgrade pip
run: python -m pip install --upgrade pip
- name: Install uv
run: pip install --upgrade uv
- name: Install dependencies
run: uv pip install --system "datasets[tests] @ ."
- name: Test with pytest
run: |
python -m pytest -rfExX -m ${{ matrix.test }} -n 2 --dist loadfile -sv ./tests/
test_py311_numpy2:
needs: check_code_quality
strategy:
matrix:
test: ['unit']
os: [ubuntu-latest, windows-latest]
deps_versions: [deps-latest]
continue-on-error: false
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Set up Python 3.11
uses: actions/setup-python@v5
with:
python-version: "3.11"
- name: Upgrade pip
run: python -m pip install --upgrade pip
- name: Install uv
run: pip install --upgrade uv
- name: Install dependencies
run: uv pip install --system "datasets[tests_numpy2] @ ."
- name: Test with pytest
run: |
python -m pytest -rfExX -m ${{ matrix.test }} -n 2 --dist loadfile -sv ./tests/