Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Added Utf8 and Binary
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao committed Feb 19, 2022
1 parent 76fbc46 commit 606afd3
Show file tree
Hide file tree
Showing 3 changed files with 98 additions and 7 deletions.
36 changes: 35 additions & 1 deletion arrow-odbc-integration-testing/src/write.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ use arrow2::array::{Array, BinaryArray, BooleanArray, Int32Array, Utf8Array};
use arrow2::chunk::Chunk;
use arrow2::datatypes::{DataType, Field};
use arrow2::error::Result;
use arrow2::io::odbc::api::{Connection, Cursor};
use arrow2::io::odbc::write::{buffer_from_description, infer_descriptions, serialize};

use super::read::read;
Expand Down Expand Up @@ -97,3 +96,38 @@ fn bool_nullable() -> Result<()> {
&table_name,
)
}

#[test]
fn utf8() -> Result<()> {
let table_name = function_name!().rsplit_once(':').unwrap().1;
let table_name = format!("write_{}", table_name);
let expected =
Chunk::new(vec![
Box::new(Utf8Array::<i32>::from([Some("aa"), None, Some("aaaa")])) as _,
]);

test(
expected,
vec![Field::new("a", DataType::Utf8, true)],
"VARCHAR(4)",
&table_name,
)
}

#[test]
fn binary() -> Result<()> {
let table_name = function_name!().rsplit_once(':').unwrap().1;
let table_name = format!("write_{}", table_name);
let expected = Chunk::new(vec![Box::new(BinaryArray::<i32>::from([
Some(&b"aa"[..]),
None,
Some(&b"aaaa"[..]),
])) as _]);

test(
expected,
vec![Field::new("a", DataType::Binary, true)],
"VARBINARY(4)",
&table_name,
)
}
4 changes: 3 additions & 1 deletion src/io/odbc/write/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,9 @@ fn data_type_to(data_type: &DataType) -> Result<api::DataType> {
DataType::Int32 => api::DataType::Integer,
DataType::Float32 => api::DataType::Float { precision: 24 },
DataType::Float64 => api::DataType::Float { precision: 53 },
DataType::FixedSizeBinary(length) => api::DataType::Varbinary { length: *length },
DataType::FixedSizeBinary(length) => api::DataType::Binary { length: *length },
DataType::Binary | DataType::LargeBinary => api::DataType::Varbinary { length: 0 },
DataType::Utf8 | DataType::LargeUtf8 => api::DataType::LongVarchar { length: 0 },
other => return Err(ArrowError::nyi(format!("{other:?} to ODBC"))),
})
}
65 changes: 60 additions & 5 deletions src/io/odbc/write/serialize.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use api::buffers::BinColumnWriter;
use api::buffers::{BinColumnWriter, TextColumnWriter};

use crate::array::{Array, BooleanArray, FixedSizeBinaryArray, PrimitiveArray};
use crate::array::*;
use crate::bitmap::Bitmap;
use crate::datatypes::DataType;
use crate::error::{ArrowError, Result};
Expand Down Expand Up @@ -68,12 +68,44 @@ pub fn serialize(array: &dyn Array, column: &mut api::buffers::AnyColumnViewMut)
Err(ArrowError::nyi("serialize f64 to non-f64 ODBC"))
}
}
DataType::Utf8 => {
if let api::buffers::AnyColumnViewMut::Text(values) = column {
utf8::<i32>(array.as_any().downcast_ref().unwrap(), values);
Ok(())
} else {
Err(ArrowError::nyi("serialize utf8 to non-text ODBC"))
}
}
DataType::LargeUtf8 => {
if let api::buffers::AnyColumnViewMut::Text(values) = column {
utf8::<i64>(array.as_any().downcast_ref().unwrap(), values);
Ok(())
} else {
Err(ArrowError::nyi("serialize utf8 to non-text ODBC"))
}
}
DataType::Binary => {
if let api::buffers::AnyColumnViewMut::Binary(values) = column {
binary::<i32>(array.as_any().downcast_ref().unwrap(), values);
Ok(())
} else {
Err(ArrowError::nyi("serialize utf8 to non-binary ODBC"))
}
}
DataType::LargeBinary => {
if let api::buffers::AnyColumnViewMut::Binary(values) = column {
binary::<i64>(array.as_any().downcast_ref().unwrap(), values);
Ok(())
} else {
Err(ArrowError::nyi("serialize utf8 to non-text ODBC"))
}
}
DataType::FixedSizeBinary(_) => {
if let api::buffers::AnyColumnViewMut::Binary(values) = column {
binary(array.as_any().downcast_ref().unwrap(), values);
fixed_binary(array.as_any().downcast_ref().unwrap(), values);
Ok(())
} else {
Err(ArrowError::nyi("serialize f64 to non-f64 ODBC"))
Err(ArrowError::nyi("serialize fixed to non-binary ODBC"))
}
}
other => Err(ArrowError::nyi(format!("{other:?} to ODBC"))),
Expand Down Expand Up @@ -117,6 +149,29 @@ fn primitive_optional<T: NativeType>(array: &PrimitiveArray<T>, values: &mut Nul
write_validity(array.validity(), values.indicators());
}

fn binary(array: &FixedSizeBinaryArray, writer: &mut BinColumnWriter) {
fn fixed_binary(array: &FixedSizeBinaryArray, writer: &mut BinColumnWriter) {
writer.set_max_len(array.size());
writer.write(array.iter())
}

fn binary<O: Offset>(array: &BinaryArray<O>, writer: &mut BinColumnWriter) {
let max_len = array
.offsets()
.windows(2)
.map(|x| (x[1] - x[0]).to_usize())
.max()
.unwrap_or(0);
writer.set_max_len(max_len);
writer.write(array.iter())
}

fn utf8<O: Offset>(array: &Utf8Array<O>, writer: &mut TextColumnWriter<u8>) {
let max_len = array
.offsets()
.windows(2)
.map(|x| (x[1] - x[0]).to_usize())
.max()
.unwrap_or(0);
writer.set_max_len(max_len);
writer.write(array.iter().map(|x| x.map(|x| x.as_bytes())))
}

0 comments on commit 606afd3

Please sign in to comment.