-
Notifications
You must be signed in to change notification settings - Fork 875
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Split out arrow-schema
(#2594)
#2711
Changes from all commits
a2f6a72
da189d1
f2ff606
e865100
3f213db
8c68812
b01cb8e
af2652b
56eb7a6
a94fda1
8efd726
02d0e38
4e1896c
6dfabe1
b614881
bdaa1cf
d963c54
3c3faf2
6f62bb6
4ec95e8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,6 +18,7 @@ | |
[workspace] | ||
members = [ | ||
"arrow", | ||
"arrow-schema", | ||
"arrow-buffer", | ||
"arrow-flight", | ||
"parquet", | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -28,9 +28,13 @@ use arrow::compute::kernels; | |
use arrow::datatypes::{DataType, Field, Schema}; | ||
use arrow::error::ArrowError; | ||
use arrow::ffi_stream::ArrowArrayStreamReader; | ||
use arrow::pyarrow::PyArrowConvert; | ||
use arrow::pyarrow::{PyArrowConvert, PyArrowException, PyArrowType}; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The pyarrow bindings take a bit of a hit from this split, but I don't really see an obvious way around this, unless we push pyo3 into arrow-schema also. Thoughts? Edit: This doesn't actually work, because the conversions for Schema require the FFI bindings, so I don't think there is a way around this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. cc @kszucs / @andygrove I don't use the python bindings so I don't understand the implications of this change There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I will look into this. These are important in DataFusion/Ballista for executing Python UDFs. I will have time to review tomorrow. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I ran out of time today - the Ballista release work took longer than hoped. I will try and look at this over the weekend. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we need to wait until apache/datafusion#3483 is passing, and then we can create a PR in https://github.com/apache/arrow-datafusion-python/ to use that version and make sure the tests pass There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. apache/datafusion#3483 is now ready for review / merge. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updates in apache/datafusion-python#54 |
||
use arrow::record_batch::RecordBatch; | ||
|
||
fn to_py_err(err: ArrowError) -> PyErr { | ||
PyArrowException::new_err(err.to_string()) | ||
} | ||
|
||
/// Returns `array + array` of an int64 array. | ||
#[pyfunction] | ||
fn double(array: &PyAny, py: Python) -> PyResult<PyObject> { | ||
|
@@ -41,8 +45,10 @@ fn double(array: &PyAny, py: Python) -> PyResult<PyObject> { | |
let array = array | ||
.as_any() | ||
.downcast_ref::<Int64Array>() | ||
.ok_or(ArrowError::ParseError("Expects an int64".to_string()))?; | ||
let array = kernels::arithmetic::add(array, array)?; | ||
.ok_or_else(|| ArrowError::ParseError("Expects an int64".to_string())) | ||
.map_err(to_py_err)?; | ||
|
||
let array = kernels::arithmetic::add(array, array).map_err(to_py_err)?; | ||
|
||
// export | ||
array.to_pyarrow(py) | ||
|
@@ -66,56 +72,61 @@ fn double_py(lambda: &PyAny, py: Python) -> PyResult<bool> { | |
|
||
/// Returns the substring | ||
#[pyfunction] | ||
fn substring(array: ArrayData, start: i64) -> PyResult<ArrayData> { | ||
fn substring( | ||
array: PyArrowType<ArrayData>, | ||
start: i64, | ||
) -> PyResult<PyArrowType<ArrayData>> { | ||
// import | ||
let array = ArrayRef::from(array); | ||
let array = ArrayRef::from(array.0); | ||
|
||
// substring | ||
let array = kernels::substring::substring(array.as_ref(), start, None)?; | ||
let array = kernels::substring::substring(array.as_ref(), start, None).map_err(to_py_err)?; | ||
|
||
Ok(array.data().to_owned()) | ||
Ok(array.data().to_owned().into()) | ||
} | ||
|
||
/// Returns the concatenate | ||
#[pyfunction] | ||
fn concatenate(array: ArrayData, py: Python) -> PyResult<PyObject> { | ||
let array = ArrayRef::from(array); | ||
fn concatenate(array: PyArrowType<ArrayData>, py: Python) -> PyResult<PyObject> { | ||
let array = ArrayRef::from(array.0); | ||
|
||
// concat | ||
let array = kernels::concat::concat(&[array.as_ref(), array.as_ref()])?; | ||
let array = kernels::concat::concat(&[array.as_ref(), array.as_ref()]).map_err(to_py_err)?; | ||
|
||
array.to_pyarrow(py) | ||
} | ||
|
||
#[pyfunction] | ||
fn round_trip_type(obj: DataType) -> PyResult<DataType> { | ||
fn round_trip_type(obj: PyArrowType<DataType>) -> PyResult<PyArrowType<DataType>> { | ||
Ok(obj) | ||
} | ||
|
||
#[pyfunction] | ||
fn round_trip_field(obj: Field) -> PyResult<Field> { | ||
fn round_trip_field(obj: PyArrowType<Field>) -> PyResult<PyArrowType<Field>> { | ||
Ok(obj) | ||
} | ||
|
||
#[pyfunction] | ||
fn round_trip_schema(obj: Schema) -> PyResult<Schema> { | ||
fn round_trip_schema(obj: PyArrowType<Schema>) -> PyResult<PyArrowType<Schema>> { | ||
Ok(obj) | ||
} | ||
|
||
#[pyfunction] | ||
fn round_trip_array(obj: ArrayData) -> PyResult<ArrayData> { | ||
fn round_trip_array(obj: PyArrowType<ArrayData>) -> PyResult<PyArrowType<ArrayData>> { | ||
Ok(obj) | ||
} | ||
|
||
#[pyfunction] | ||
fn round_trip_record_batch(obj: RecordBatch) -> PyResult<RecordBatch> { | ||
fn round_trip_record_batch( | ||
obj: PyArrowType<RecordBatch>, | ||
) -> PyResult<PyArrowType<RecordBatch>> { | ||
Ok(obj) | ||
} | ||
|
||
#[pyfunction] | ||
fn round_trip_record_batch_reader( | ||
obj: ArrowArrayStreamReader, | ||
) -> PyResult<ArrowArrayStreamReader> { | ||
obj: PyArrowType<ArrowArrayStreamReader>, | ||
) -> PyResult<PyArrowType<ArrowArrayStreamReader>> { | ||
Ok(obj) | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
# Licensed to the Apache Software Foundation (ASF) under one | ||
# or more contributor license agreements. See the NOTICE file | ||
# distributed with this work for additional information | ||
# regarding copyright ownership. The ASF licenses this file | ||
# to you under the Apache License, Version 2.0 (the | ||
# "License"); you may not use this file except in compliance | ||
# with the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an | ||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
# KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations | ||
# under the License. | ||
|
||
[package] | ||
name = "arrow-schema" | ||
version = "23.0.0" | ||
description = "Defines the logical types for arrow arrays" | ||
homepage = "https://github.com/apache/arrow-rs" | ||
repository = "https://github.com/apache/arrow-rs" | ||
authors = ["Apache Arrow <dev@arrow.apache.org>"] | ||
license = "Apache-2.0" | ||
keywords = ["arrow"] | ||
include = [ | ||
"benches/*.rs", | ||
"src/**/*.rs", | ||
"Cargo.toml", | ||
] | ||
edition = "2021" | ||
rust-version = "1.62" | ||
|
||
[lib] | ||
name = "arrow_schema" | ||
path = "src/lib.rs" | ||
bench = false | ||
|
||
[dependencies] | ||
serde = { version = "1.0", default-features = false, features = ["derive", "std"], optional = true } | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. that is certainly a nice (very small) list of dependencies! |
||
|
||
[features] | ||
default = [] | ||
|
||
[dev-dependencies] | ||
serde_json = "1.0" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should we do the same for
arrow-buffer
as added in #2693?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We can do, but it doesn't have any feature flags that explicitly need testing
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
maybe we could add a comment for future readers like my self