Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Added FFI integration with remaining types
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao committed Jul 22, 2021
1 parent 8d7d08f commit 2ff26d8
Show file tree
Hide file tree
Showing 4 changed files with 96 additions and 11 deletions.
14 changes: 14 additions & 0 deletions arrow-pyarrow-integration-testing/tests/test_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
# under the License.

import unittest
import decimal

import pyarrow
import arrow_pyarrow_integration_testing
Expand Down Expand Up @@ -90,6 +91,19 @@ def test_string_roundtrip(self):
c = pyarrow.array(["a", None, "ccc"])
self.assertEqual(b, c)

def test_decimal_roundtrip(self):
"""
Python -> Rust -> Python
"""
data = [
round(decimal.Decimal(722.82), 2),
round(decimal.Decimal(-934.11), 2),
None
]
a = pyarrow.array(data, pyarrow.decimal128(5, 2))
b = arrow_pyarrow_integration_testing.round_trip(a)
self.assertEqual(a, b)

def test_string_python(self):
"""
Python -> Rust -> Python
Expand Down
4 changes: 4 additions & 0 deletions src/array/fixed_size_list/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -133,4 +133,8 @@ unsafe impl ToFfi for FixedSizeListArray {
fn offset(&self) -> usize {
self.offset
}

fn children(&self) -> Vec<Arc<dyn Array>> {
vec![self.values().clone()]
}
}
10 changes: 10 additions & 0 deletions src/ffi/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ pub fn try_from<A: ArrowArrayRef>(array: A) -> Result<Box<dyn Array>> {
mod tests {
use super::*;
use crate::array::*;
use crate::datatypes::TimeUnit;
use crate::{error::Result, ffi};
use std::sync::Arc;

Expand Down Expand Up @@ -146,6 +147,15 @@ mod tests {
test_round_trip(data)
}

#[test]
fn test_timestamp_tz() -> Result<()> {
let data = Int64Array::from(&vec![Some(2), None, None]).to(DataType::Timestamp(
TimeUnit::Second,
Some("UTC".to_string()),
));
test_round_trip(data)
}

#[test]
fn test_large_binary() -> Result<()> {
let data =
Expand Down
79 changes: 68 additions & 11 deletions src/ffi/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ use crate::{
bytes::{Bytes, Deallocation},
Buffer,
},
datatypes::{DataType, Field, TimeUnit},
datatypes::{DataType, Field, IntervalUnit, TimeUnit},
error::{ArrowError, Result},
types::NativeType,
};
Expand Down Expand Up @@ -200,6 +200,12 @@ fn to_field(schema: &Ffi_ArrowSchema) -> Result<Field> {
"ttm" => DataType::Time32(TimeUnit::Millisecond),
"ttu" => DataType::Time64(TimeUnit::Microsecond),
"ttn" => DataType::Time64(TimeUnit::Nanosecond),
"tDs" => DataType::Duration(TimeUnit::Second),
"tDm" => DataType::Duration(TimeUnit::Millisecond),
"tDu" => DataType::Duration(TimeUnit::Microsecond),
"tDn" => DataType::Duration(TimeUnit::Nanosecond),
"tiM" => DataType::Interval(IntervalUnit::YearMonth),
"tiD" => DataType::Interval(IntervalUnit::DayTime),
"+l" => {
let child = schema.child(0);
DataType::List(Box::new(to_field(child)?))
Expand All @@ -215,10 +221,43 @@ fn to_field(schema: &Ffi_ArrowSchema) -> Result<Field> {
DataType::Struct(children)
}
other => {
return Err(ArrowError::Ffi(format!(
"The datatype \"{}\" is still not supported in Rust implementation",
other
)))
let parts = other.split(':').collect::<Vec<_>>();
if parts.len() == 2 && parts[0] == "tss" {
DataType::Timestamp(TimeUnit::Second, Some(parts[1].to_string()))
} else if parts.len() == 2 && parts[0] == "tsm" {
DataType::Timestamp(TimeUnit::Millisecond, Some(parts[1].to_string()))
} else if parts.len() == 2 && parts[0] == "tsu" {
DataType::Timestamp(TimeUnit::Microsecond, Some(parts[1].to_string()))
} else if parts.len() == 2 && parts[0] == "tsn" {
DataType::Timestamp(TimeUnit::Nanosecond, Some(parts[1].to_string()))
} else if parts.len() == 2 && parts[0] == "d" {
let parts = parts[1].split(',').collect::<Vec<_>>();
if parts.len() < 2 || parts.len() > 3 {
return Err(ArrowError::Ffi(
"Decimal must contain 2 or 3 comma-separated values".to_string(),
));
};
if parts.len() == 3 {
let bit_width = parts[0].parse::<usize>().map_err(|_| {
ArrowError::Ffi("Decimal bit width is not a valid integer".to_string())
})?;
if bit_width != 128 {
return Err(ArrowError::Ffi("Decimal256 is not supported".to_string()));
}
}
let precision = parts[0].parse::<usize>().map_err(|_| {
ArrowError::Ffi("Decimal precision is not a valid integer".to_string())
})?;
let scale = parts[1].parse::<usize>().map_err(|_| {
ArrowError::Ffi("Decimal scale is not a valid integer".to_string())
})?;
DataType::Decimal(precision, scale)
} else {
return Err(ArrowError::Ffi(format!(
"The datatype \"{}\" is still not supported in Rust implementation",
other
)));
}
}
};
Ok(Field::new(schema.name(), data_type, schema.nullable()))
Expand Down Expand Up @@ -250,15 +289,33 @@ fn to_format(data_type: &DataType) -> Result<String> {
DataType::Time32(TimeUnit::Millisecond) => "ttm",
DataType::Time64(TimeUnit::Microsecond) => "ttu",
DataType::Time64(TimeUnit::Nanosecond) => "ttn",
DataType::Duration(TimeUnit::Second) => "tDs",
DataType::Duration(TimeUnit::Millisecond) => "tDm",
DataType::Duration(TimeUnit::Microsecond) => "tDu",
DataType::Duration(TimeUnit::Nanosecond) => "tDn",
DataType::Interval(IntervalUnit::YearMonth) => "tiM",
DataType::Interval(IntervalUnit::DayTime) => "tiD",
DataType::Timestamp(unit, tz) => {
let unit = match unit {
TimeUnit::Second => "s",
TimeUnit::Millisecond => "m",
TimeUnit::Microsecond => "u",
TimeUnit::Nanosecond => "n",
};
return Ok(format!(
"ts{}:{}",
unit,
tz.as_ref().map(|x| x.as_ref()).unwrap_or("")
));
}
DataType::Decimal(precision, scale) => return Ok(format!("d:{},{}", precision, scale)),
DataType::List(_) => "+l",
DataType::LargeList(_) => "+L",
DataType::Struct(_) => "+s",
z => {
return Err(ArrowError::Ffi(format!(
"The datatype \"{:?}\" is still not supported in Rust implementation",
z
)))
}
DataType::FixedSizeBinary(size) => return Ok(format!("w{}", size)),
DataType::FixedSizeList(_, size) => return Ok(format!("+w:{}", size)),
DataType::Union(_) => todo!(),
_ => todo!(),
}
.to_string())
}
Expand Down

0 comments on commit 2ff26d8

Please sign in to comment.