diff --git a/src/io/csv/read/deserialize.rs b/src/io/csv/read/deserialize.rs index 37a8a45f813..66d77a907c3 100644 --- a/src/io/csv/read/deserialize.rs +++ b/src/io/csv/read/deserialize.rs @@ -15,6 +15,7 @@ use super::super::read_utils::{ }; impl ByteRecordGeneric for ByteRecord { + #[inline] fn get(&self, index: usize) -> Option<&[u8]> { self.get(index) } diff --git a/src/io/csv/read_async/deserialize.rs b/src/io/csv/read_async/deserialize.rs index 41074b99844..aa4ce96097b 100644 --- a/src/io/csv/read_async/deserialize.rs +++ b/src/io/csv/read_async/deserialize.rs @@ -15,6 +15,7 @@ use super::super::read_utils::{ }; impl ByteRecordGeneric for ByteRecord { + #[inline] fn get(&self, index: usize) -> Option<&[u8]> { self.get(index) } diff --git a/src/io/csv/read_utils.rs b/src/io/csv/read_utils.rs index f741d7d4422..2ee25c85914 100644 --- a/src/io/csv/read_utils.rs +++ b/src/io/csv/read_utils.rs @@ -20,6 +20,12 @@ use crate::{ use super::utils::RFC3339; +#[inline] +fn to_utf8(bytes: &[u8]) -> Option<&str> { + simdutf8::basic::from_utf8(bytes).ok() +} + +#[inline] fn deserialize_primitive( rows: &[B], column: usize, @@ -86,6 +92,7 @@ fn deserialize_decimal(bytes: &[u8], precision: usize, scale: usize) -> Option(rows: &[B], column: usize, op: F) -> Arc where B: ByteRecordGeneric, @@ -103,14 +110,16 @@ where Arc::new(BooleanArray::from_trusted_len_iter(iter)) } +#[inline] fn deserialize_utf8(rows: &[B], column: usize) -> Arc { let iter = rows.iter().map(|row| match row.get(column) { - Some(bytes) => simdutf8::basic::from_utf8(bytes).ok(), + Some(bytes) => to_utf8(bytes), None => None, }); Arc::new(Utf8Array::::from_trusted_len_iter(iter)) } +#[inline] fn deserialize_binary( rows: &[B], column: usize, @@ -136,6 +145,7 @@ fn deserialize_datetime(string: &str, tz: &T) -> Option( rows: &[B], column: usize, @@ -184,36 +194,31 @@ pub(crate) fn deserialize_column( lexical_core::parse::(bytes).ok() }), Date32 => deserialize_primitive(rows, column, datatype, |bytes| { - simdutf8::basic::from_utf8(bytes) - .ok() + to_utf8(bytes) .and_then(|x| x.parse::().ok()) .map(|x| x.num_days_from_ce() - temporal_conversions::EPOCH_DAYS_FROM_CE) }), Date64 => deserialize_primitive(rows, column, datatype, |bytes| { - simdutf8::basic::from_utf8(bytes) - .ok() + to_utf8(bytes) .and_then(|x| x.parse::().ok()) .map(|x| x.timestamp_millis()) }), Timestamp(TimeUnit::Nanosecond, None) => { deserialize_primitive(rows, column, datatype, |bytes| { - simdutf8::basic::from_utf8(bytes) - .ok() + to_utf8(bytes) .and_then(|x| x.parse::().ok()) .map(|x| x.timestamp_nanos()) }) } Timestamp(TimeUnit::Microsecond, None) => { deserialize_primitive(rows, column, datatype, |bytes| { - simdutf8::basic::from_utf8(bytes) - .ok() + to_utf8(bytes) .and_then(|x| x.parse::().ok()) .map(|x| x.timestamp_nanos() / 1000) }) } Timestamp(time_unit, None) => deserialize_primitive(rows, column, datatype, |bytes| { - simdutf8::basic::from_utf8(bytes) - .ok() + to_utf8(bytes) .and_then(|x| x.parse::().ok()) .map(|x| x.timestamp_nanos()) .map(|x| match time_unit { @@ -226,8 +231,7 @@ pub(crate) fn deserialize_column( Timestamp(time_unit, Some(ref tz)) => { let tz = temporal_conversions::parse_offset(tz)?; deserialize_primitive(rows, column, datatype, |bytes| { - simdutf8::basic::from_utf8(bytes) - .ok() + to_utf8(bytes) .and_then(|x| deserialize_datetime(x, &tz)) .map(|x| match time_unit { TimeUnit::Second => x / 1_000_000_000,