diff --git a/.github/workflows/test_win.yml b/.github/workflows/test_win.yml index d12d0e12..7e5217f4 100644 --- a/.github/workflows/test_win.yml +++ b/.github/workflows/test_win.yml @@ -34,6 +34,6 @@ jobs: - name: Install dependencies run: pip install -r requirements.txt - name: Install pyarrow - run: pip install pyarrow==7.0.0 + run: pip install pyarrow==8.0.0 - name: Test run: python -m pytest --reruns 2 --only-rerun 'Overflow in cast' tests/fugue tests/fugue_dask tests/fugue_ibis tests/fugue_duckdb diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 00000000..61dc1129 --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,17 @@ +version: 2 + +# Set the version of Python and other tools you might need +build: + os: ubuntu-20.04 + tools: + python: "3.10" + jobs: + pre_install: + - pip install -U pip + +sphinx: + configuration: docs/conf.py + +python: + install: + - requirements: requirements.txt diff --git a/fugue_ibis/execution_engine.py b/fugue_ibis/execution_engine.py index 1c8b3c4c..1f2d0846 100644 --- a/fugue_ibis/execution_engine.py +++ b/fugue_ibis/execution_engine.py @@ -92,11 +92,11 @@ def join( _df2 = self.to_df(df2) key_schema, end_schema = get_join_schemas(_df1, _df2, how=how, on=on) on_fields = [_df1.native[k] == _df2.native[k] for k in key_schema] - if ibis.__version__ < "6": + if ibis.__version__ < "6": # pragma: no cover suffixes: Dict[str, Any] = dict(suffixes=("", _JOIN_RIGHT_SUFFIX)) - else: # pragma: no cover + else: # breaking change in ibis 6.0 - suffixes = dict(lname="", rname=_JOIN_RIGHT_SUFFIX) + suffixes = dict(lname="", rname="{name}" + _JOIN_RIGHT_SUFFIX) if how.lower() == "cross": tb = _df1.native.cross_join(_df2.native, **suffixes) elif how.lower() == "right_outer": diff --git a/setup.py b/setup.py index 8b2e2dff..baf03d8f 100644 --- a/setup.py +++ b/setup.py @@ -38,7 +38,7 @@ def get_version() -> str: keywords="distributed spark dask ray sql dsl domain specific language", url="http://github.com/fugue-project/fugue", install_requires=[ - "triad>=0.9.3", + "triad>=0.9.4", "adagio>=0.2.4", ], extras_require={ @@ -50,14 +50,19 @@ def get_version() -> str: "pyarrow>=7.0.0", "pandas>=2.0.2", ], - "ray": ["ray[data]>=2.4.0", "duckdb>=0.5.0", "pyarrow>=6.0.1"], + "ray": [ + "ray[data]>=2.4.0", + "duckdb>=0.5.0", + "pyarrow>=6.0.1", + "pandas<2.2", + ], "duckdb": SQL_DEPENDENCIES + [ "duckdb>=0.5.0", "numpy", ], "polars": ["polars"], - "ibis": SQL_DEPENDENCIES + ["ibis-framework>=3.2.0,<6"], + "ibis": SQL_DEPENDENCIES + ["ibis-framework"], "notebook": ["notebook", "jupyterlab", "ipython>=7.10.0"], "all": SQL_DEPENDENCIES + [ @@ -70,8 +75,8 @@ def get_version() -> str: "ipython>=7.10.0", "duckdb>=0.5.0", "pyarrow>=6.0.1", - "pandas>=2.0.2", - "ibis-framework>=3.2.0,<6", + "pandas>=2.0.2,<2.2", # because of Ray + "ibis-framework", "polars", ], }, diff --git a/tests/fugue_duckdb/test_execution_engine.py b/tests/fugue_duckdb/test_execution_engine.py index 9b3f69ca..b41416ab 100644 --- a/tests/fugue_duckdb/test_execution_engine.py +++ b/tests/fugue_duckdb/test_execution_engine.py @@ -3,7 +3,6 @@ import duckdb import pandas as pd import pyarrow as pa -import pytest from pytest import raises import fugue.api as fa @@ -11,7 +10,6 @@ from fugue import ArrowDataFrame, DataFrame, FugueWorkflow, fsql from fugue.api import engine_context from fugue.plugins import infer_execution_engine -from fugue_duckdb import DuckExecutionEngine from fugue_duckdb.dataframe import DuckDataFrame from fugue_test.builtin_suite import BuiltInTests from fugue_test.execution_suite import ExecutionEngineTests @@ -109,39 +107,55 @@ def test_builtin_connection(): def test_configs(): - dag = FugueWorkflow() - df = dag.df([[None], [1]], "a:double") - df = dag.select("SELECT * FROM ", df, "ORDER BY a LIMIT 1") - df.assert_eq(dag.df([[None]], "a:double")) - - dag.run( - "duckdb", - { - "fugue.duckdb.pragma.threads": 2, - "fugue.duckdb.pragma.default_null_order": "NULLS FIRST", - }, + df = fa.as_pandas( + fa.fugue_sql( + """ + SELECT name, value FROM duckdb_settings() + WHERE name IN ('threads') + """, + engine="duckdb", + engine_conf={"fugue.duckdb.pragma.threads": 1}, + ) ) + assert df.value.iloc[0] == "1" - dag = FugueWorkflow() - df = dag.df([[None], [1]], "a:double") - df = dag.select("SELECT * FROM ", df, "ORDER BY a LIMIT 1") - df.assert_eq(dag.df([[1]], "a:double")) - - dag.run( - "duckdb", - { - "fugue.duckdb.pragma.threads": 2, - "fugue.duckdb.pragma.default_null_order": "NULLS LAST", - }, + df = fa.as_pandas( + fa.fugue_sql( + """ + SELECT name, value FROM duckdb_settings() + WHERE name IN ('threads') + """, + engine="duckdb", + engine_conf={"fugue.duckdb.pragma.threads": 3}, + ) ) + assert df.value.iloc[0] == "3" with raises(ValueError): # invalid config format - dag.run("duckdb", {"fugue.duckdb.pragma.threads;xx": 2}) + df = fa.as_pandas( + fa.fugue_sql( + """ + SELECT name, value FROM duckdb_settings() + WHERE name IN ('threads') + """, + engine="duckdb", + engine_conf={"fugue.duckdb.pragma.threads;xx": 3}, + ) + ) with raises(Exception): # non-existent config - dag.run("duckdb", {"fugue.duckdb.pragma.threads_xx": 2}) + df = fa.as_pandas( + fa.fugue_sql( + """ + SELECT name, value FROM duckdb_settings() + WHERE name IN ('threads') + """, + engine="duckdb", + engine_conf={"fugue.duckdb.pragma.threads_xx": 3}, + ) + ) def test_annotations():