From 594880329f01785c997660e724dad74b9ed3a795 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Sun, 30 Jan 2022 10:33:24 -0800 Subject: [PATCH 1/8] Implement other side of conversion --- datafusion/src/pyarrow.rs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/datafusion/src/pyarrow.rs b/datafusion/src/pyarrow.rs index da05d63d8c2c..c7dc84d270db 100644 --- a/datafusion/src/pyarrow.rs +++ b/datafusion/src/pyarrow.rs @@ -15,10 +15,9 @@ // specific language governing permissions and limitations // under the License. -use pyo3::exceptions::{PyException, PyNotImplementedError}; +use pyo3::exceptions::PyException; use pyo3::prelude::*; use pyo3::types::PyList; -use pyo3::PyNativeType; use crate::arrow::array::ArrayData; use crate::arrow::pyarrow::PyArrowConvert; @@ -49,8 +48,13 @@ impl PyArrowConvert for ScalarValue { Ok(scalar) } - fn to_pyarrow(&self, _py: Python) -> PyResult { - Err(PyNotImplementedError::new_err("Not implemented")) + fn to_pyarrow(&self, py: Python) -> PyResult { + let array = self.to_array(); + // convert to pyarrow array using C data interface + let pyarray = array.data_ref().clone().into_py(py); + let pyscalar = pyarray.call_method1(py, "__getitem__", (0,))?; + + Ok(pyscalar) } } From bab0dbcef436abc36ef6274098bc69dec26078bf Mon Sep 17 00:00:00 2001 From: Will Jones Date: Sun, 30 Jan 2022 10:46:20 -0800 Subject: [PATCH 2/8] Add test workflow --- .github/workflows/rust.yml | 42 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 4b633d4bc9e5..31cf95d7d209 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -230,6 +230,48 @@ jobs: # do not produce debug symbols to keep memory usage down RUSTFLAGS: "-C debuginfo=0" + test-datafusion-pyarrow: + needs: [linux-build-lib] + runs-on: ubuntu-latest + strategy: + matrix: + arch: [amd64] + rust: [stable] + container: + image: ${{ matrix.arch }}/rust + env: + # Disable full debug symbol generation to speed up CI build and keep memory down + # "1" means line tables only, which is useful for panic tracebacks. + RUSTFLAGS: "-C debuginfo=1" + steps: + - uses: actions/checkout@v2 + with: + submodules: true + - name: Cache Cargo + uses: actions/cache@v2 + with: + path: /github/home/.cargo + # this key equals the ones on `linux-build-lib` for re-use + key: cargo-cache- + - name: Cache Rust dependencies + uses: actions/cache@v2 + with: + path: /github/home/target + # this key equals the ones on `linux-build-lib` for re-use + key: ${{ runner.os }}-${{ matrix.arch }}-target-cache-${{ matrix.rust }} + - name: Setup Rust toolchain + run: | + rustup toolchain install ${{ matrix.rust }} + rustup default ${{ matrix.rust }} + rustup component add rustfmt + - name: Run tests + run: | + cd datafusion + cargo test --features=pyarrow + env: + CARGO_HOME: "/github/home/.cargo" + CARGO_TARGET_DIR: "/github/home/target" + lint: name: Lint runs-on: ubuntu-latest From 4bd8e7e514e9d24f4530314c0bc9908e7dbaf4ed Mon Sep 17 00:00:00 2001 From: Will Jones Date: Sun, 30 Jan 2022 13:00:57 -0800 Subject: [PATCH 3/8] Add (failing) tests --- datafusion/src/pyarrow.rs | 75 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/datafusion/src/pyarrow.rs b/datafusion/src/pyarrow.rs index c7dc84d270db..de95375096fd 100644 --- a/datafusion/src/pyarrow.rs +++ b/datafusion/src/pyarrow.rs @@ -69,3 +69,78 @@ impl<'a> IntoPy for ScalarValue { self.to_pyarrow(py).unwrap() } } + +#[cfg(test)] +mod tests { + use super::*; + use pyo3::prepare_freethreaded_python; + use pyo3::py_run; + use pyo3::types::PyDict; + use pyo3::Python; + + fn init_python() { + prepare_freethreaded_python(); + Python::with_gil(|py| { + if let Err(err) = py.run("import pyarrow", None, None) { + let locals = PyDict::new(py); + py.run( + "import sys; executable = sys.executable; python_path = sys.path", + None, + Some(locals), + ) + .expect("Couldn't get python info"); + let executable: String = + locals.get_item("executable").unwrap().extract().unwrap(); + let python_path: Vec<&str> = + locals.get_item("python_path").unwrap().extract().unwrap(); + + Err(err).expect( + format!( + "pyarrow not found\nExecutable: {}\nPython path: {:?}\n\ + HINT: try `pip install pyarrow` + HINT: if wrong Python path, try `PYO3_PYTHON=$(which python)`", + executable, python_path + ) + .as_ref(), + ) + } + }) + } + + #[test] + fn test_roundtrip() { + init_python(); + + let example_scalars = vec![ + ScalarValue::Boolean(Some(true)), + ScalarValue::Int32(Some(23)), + ScalarValue::Float64(Some(12.34)), + ScalarValue::Utf8(Some("Hello!".to_string())), + ScalarValue::Date64(Some(2014)), + ]; + + Python::with_gil(|py| { + for scalar in example_scalars.iter() { + let result = + ScalarValue::from_pyarrow(scalar.to_pyarrow(py).unwrap().as_ref(py)) + .unwrap(); + assert_eq!(scalar, &result); + } + }); + } + + #[test] + fn test_py_scalar() { + init_python(); + + Python::with_gil(|py| { + let scalar_float = ScalarValue::Float64(Some(12.34)); + let py_float = scalar_float.into_py(py).call_method0(py, "as_py").unwrap(); + py_run!(py, py_float, "assert py_float == 12.34"); + + let scalar_string = ScalarValue::Utf8(Some("Hello!".to_string())); + let py_string = scalar_string.into_py(py).call_method0(py, "as_py").unwrap(); + py_run!(py, py_string, "assert py_string == 'Hello!'"); + }); + } +} From 8db4147a8b3cfb92c56d0a1559f2930321d6246b Mon Sep 17 00:00:00 2001 From: Will Jones Date: Sun, 30 Jan 2022 15:23:51 -0800 Subject: [PATCH 4/8] Get unit tests passing --- .github/workflows/rust.yml | 6 ++++++ datafusion/src/pyarrow.rs | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 31cf95d7d209..c1b8c0275ef4 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -259,6 +259,12 @@ jobs: path: /github/home/target # this key equals the ones on `linux-build-lib` for re-use key: ${{ runner.os }}-${{ matrix.arch }}-target-cache-${{ matrix.rust }} + - uses: actions/setup-python@v2 + with: + python-version: "3.8" + - name: Install PyArrow + run: | + pip install pyarrow - name: Setup Rust toolchain run: | rustup toolchain install ${{ matrix.rust }} diff --git a/datafusion/src/pyarrow.rs b/datafusion/src/pyarrow.rs index de95375096fd..342fc4349b03 100644 --- a/datafusion/src/pyarrow.rs +++ b/datafusion/src/pyarrow.rs @@ -116,7 +116,7 @@ mod tests { ScalarValue::Int32(Some(23)), ScalarValue::Float64(Some(12.34)), ScalarValue::Utf8(Some("Hello!".to_string())), - ScalarValue::Date64(Some(2014)), + ScalarValue::Date32(Some(1234)), ]; Python::with_gil(|py| { From c8ddfa63990e611b4b0a57e18b00e89011a72362 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Mon, 31 Jan 2022 07:37:01 -0800 Subject: [PATCH 5/8] Use python -m pip --- .github/workflows/rust.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index c1b8c0275ef4..57e598b7e3e2 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -264,7 +264,7 @@ jobs: python-version: "3.8" - name: Install PyArrow run: | - pip install pyarrow + python -m pip install pyarrow - name: Setup Rust toolchain run: | rustup toolchain install ${{ matrix.rust }} From 4c8943638253e9d295c500e5640e4cbe485e721b Mon Sep 17 00:00:00 2001 From: Will Jones Date: Mon, 31 Jan 2022 07:58:12 -0800 Subject: [PATCH 6/8] Debug LD_LIBRARY_PATH --- .github/workflows/rust.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 57e598b7e3e2..11a807b8b5ae 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -265,6 +265,9 @@ jobs: - name: Install PyArrow run: | python -m pip install pyarrow + - name: Debug + run: | + ls $LD_LIBRARY_PATH - name: Setup Rust toolchain run: | rustup toolchain install ${{ matrix.rust }} From eeabd8ed11e068b60baaf275b9ee7b9a4c9c2c16 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Mon, 31 Jan 2022 08:14:23 -0800 Subject: [PATCH 7/8] Set LIBRARY_PATH --- .github/workflows/rust.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 11a807b8b5ae..d466d67efa6f 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -264,10 +264,8 @@ jobs: python-version: "3.8" - name: Install PyArrow run: | + echo "LIBRARY_PATH=$LD_LIBRARY_PATH" >> $GITHUB_ENV python -m pip install pyarrow - - name: Debug - run: | - ls $LD_LIBRARY_PATH - name: Setup Rust toolchain run: | rustup toolchain install ${{ matrix.rust }} From 78ac9ff0f032b7911d733bee727faf68e5feae7f Mon Sep 17 00:00:00 2001 From: Will Jones Date: Wed, 2 Feb 2022 21:26:25 -0800 Subject: [PATCH 8/8] Update help with better info --- datafusion/src/pyarrow.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/datafusion/src/pyarrow.rs b/datafusion/src/pyarrow.rs index 342fc4349b03..62a979233f02 100644 --- a/datafusion/src/pyarrow.rs +++ b/datafusion/src/pyarrow.rs @@ -97,8 +97,11 @@ mod tests { Err(err).expect( format!( "pyarrow not found\nExecutable: {}\nPython path: {:?}\n\ - HINT: try `pip install pyarrow` - HINT: if wrong Python path, try `PYO3_PYTHON=$(which python)`", + HINT: try `pip install pyarrow`\n\ + NOTE: On Mac OS, you must compile against a Framework Python \ + (default in python.org installers and brew, but not pyenv)\n\ + NOTE: On Mac OS, PYO3 might point to incorrect path. Try \ + `export PYTHONPATH=$(python -c \"import sys; print(sys.path[-1])\")`\n", executable, python_path ) .as_ref(),