Skip to content

Commit

Permalink
CI Fixes (#162)
Browse files Browse the repository at this point in the history
* Set RUST_BACKTRACE 1

* Add protoc action to build.yml for manylinux

* Add protoc action to test.yaml matrix

* Bump to 17.0.0 for a test

* include features

* test re-enabling protoc action

* enable protoc action in test.yaml

* blake formatting

* python linter fixes

* blake fixes

* update run tests action

* remove duplicate maturin develop

* Include Pip install for datafusion dist

* Remove some pip install options that are not needed

* Install dist/datafusion*.whl

* Use virtualenv python version for pip install

* Use vitualenv python version for pytest run

* examine the contents of the result whl file to make sure _internal.abi3.so is present

* Wrong file extension ... change to .whl from .zip

* Try another flavor of pip installing

* Fix pyarrow version

* update pip install process

* testing

* doh

* Remove previous maturin build since now it happens in the test run section
  • Loading branch information
jdye64 authored Feb 2, 2023
1 parent 8975d4e commit 8181c84
Show file tree
Hide file tree
Showing 6 changed files with 44 additions and 86 deletions.
39 changes: 0 additions & 39 deletions .github/actions/setup-builder/action.yaml

This file was deleted.

6 changes: 6 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -97,9 +97,15 @@ jobs:
name: python-wheel-license
path: .
- run: cat LICENSE.txt
- name: Install Protoc
uses: arduino/setup-protoc@v1
with:
version: '3.x'
- name: Build wheels
uses: PyO3/maturin-action@v1
with:
env:
RUST_BACKTRACE: 1
rust-toolchain: nightly
target: x86_64
manylinux: auto
Expand Down
37 changes: 9 additions & 28 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -53,18 +53,10 @@ jobs:
toolchain: ${{ matrix.toolchain }}
override: true

- name: Install protobuf compiler
shell: bash
run: |
mkdir -p $HOME/d/protoc
cd $HOME/d/protoc
export PROTO_ZIP="protoc-21.4-linux-x86_64.zip"
curl -LO https://github.com/protocolbuffers/protobuf/releases/download/v21.4/$PROTO_ZIP
unzip $PROTO_ZIP
export PATH=$PATH:$HOME/d/protoc/bin
export PROTOC=$HOME/d/protoc/bin
sudo chown -R $(whoami) $HOME/d/protoc
protoc --version
- name: Install Protoc
uses: arduino/setup-protoc@v1
with:
version: '3.x'

- name: Setup Python
uses: actions/setup-python@v4
Expand Down Expand Up @@ -112,22 +104,11 @@ jobs:
flake8 --exclude venv --ignore=E501,W503
black --line-length 79 --diff --check .
- name: Build wheels
uses: PyO3/maturin-action@v1
with:
command: build
args: --release --out dist

- name: Run tests
env:
RUST_BACKTRACE: 1
run: |
git submodule update --init
export PATH=$PATH:$HOME/d/protoc/bin
export PROTOC=$HOME/d/protoc/bin
sudo chown -R $(whoami) $HOME/d/protoc
ls -l $HOME/d/protoc/
ls -l $HOME/d/protoc/bin
pip install datafusion-python --no-index --find-links dist --force-reinstall
pip install pytest
cargo clean
maturin develop
RUST_BACKTRACE=1 pytest -v .
source venv/bin/activate
pip install -e . -vv
pytest -v .
31 changes: 18 additions & 13 deletions datafusion/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,12 @@ def struct_df():

return ctx.create_dataframe([[batch]])


@pytest.fixture
def aggregate_df():
ctx = SessionContext()
ctx.register_csv('test', 'testing/data/csv/aggregate_test_100.csv')
return ctx.sql('select c1, sum(c2) from test group by c1')
ctx.register_csv("test", "testing/data/csv/aggregate_test_100.csv")
return ctx.sql("select c1, sum(c2) from test group by c1")


def test_select(df):
Expand Down Expand Up @@ -271,10 +272,11 @@ def test_logical_plan(aggregate_df):

assert expected == plan.display()

expected = \
"Projection: test.c1, SUM(test.c2)\n" \
" Aggregate: groupBy=[[test.c1]], aggr=[[SUM(test.c2)]]\n" \
expected = (
"Projection: test.c1, SUM(test.c2)\n"
" Aggregate: groupBy=[[test.c1]], aggr=[[SUM(test.c2)]]\n"
" TableScan: test"
)

assert expected == plan.display_indent()

Expand All @@ -286,25 +288,29 @@ def test_optimized_logical_plan(aggregate_df):

assert expected == plan.display()

expected = \
"Projection: test.c1, SUM(test.c2)\n" \
" Aggregate: groupBy=[[test.c1]], aggr=[[SUM(test.c2)]]\n" \
expected = (
"Projection: test.c1, SUM(test.c2)\n"
" Aggregate: groupBy=[[test.c1]], aggr=[[SUM(test.c2)]]\n"
" TableScan: test projection=[c1, c2]"
)

assert expected == plan.display_indent()


def test_execution_plan(aggregate_df):
plan = aggregate_df.execution_plan()

expected = "ProjectionExec: expr=[c1@0 as c1, SUM(test.c2)@1 as SUM(test.c2)]\n"
expected = (
"ProjectionExec: expr=[c1@0 as c1, SUM(test.c2)@1 as SUM(test.c2)]\n"
)

assert expected == plan.display()

expected = \
"ProjectionExec: expr=[c1@0 as c1, SUM(test.c2)@1 as SUM(test.c2)]\n" \
" Aggregate: groupBy=[[test.c1]], aggr=[[SUM(test.c2)]]\n" \
expected = (
"ProjectionExec: expr=[c1@0 as c1, SUM(test.c2)@1 as SUM(test.c2)]\n"
" Aggregate: groupBy=[[test.c1]], aggr=[[SUM(test.c2)]]\n"
" TableScan: test projection=[c1, c2]"
)

indent = plan.display_indent()

Expand All @@ -317,7 +323,6 @@ def test_execution_plan(aggregate_df):
assert "CsvExec:" in indent



def test_repartition(df):
df.repartition(2)

Expand Down
15 changes: 10 additions & 5 deletions datafusion/tests/test_substrait.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,8 @@
# under the License.

import pyarrow as pa
import pyarrow.dataset as ds

from datafusion import column, literal, SessionContext
from datafusion import SessionContext
from datafusion import substrait as ss
import pytest

Expand All @@ -39,8 +38,14 @@ def test_substrait_serialization(ctx):
assert ctx.tables() == {"t"}

# For now just make sure the method calls blow up
substrait_plan = ss.substrait.serde.serialize_to_plan("SELECT * FROM t", ctx)
substrait_bytes = ss.substrait.serde.serialize_bytes("SELECT * FROM t", ctx)
substrait_plan = ss.substrait.serde.serialize_to_plan(
"SELECT * FROM t", ctx
)
substrait_bytes = ss.substrait.serde.serialize_bytes(
"SELECT * FROM t", ctx
)
substrait_plan = ss.substrait.serde.deserialize_bytes(substrait_bytes)
df_logical_plan = ss.substrait.consumer.from_substrait_plan(ctx, substrait_plan)
df_logical_plan = ss.substrait.consumer.from_substrait_plan(
ctx, substrait_plan
)
substrait_plan = ss.substrait.producer.to_substrait_plan(df_logical_plan)
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ classifier = [
"Programming Language :: Rust",
]
dependencies = [
"pyarrow>=1",
"pyarrow>=6.0.1",
]

[project.urls]
Expand Down

0 comments on commit 8181c84

Please sign in to comment.