From 81ec49c803b74b4a57c5e9158126d8f1ec12b7c5 Mon Sep 17 00:00:00 2001 From: Jorge Leitao Date: Thu, 26 Aug 2021 09:14:09 +0100 Subject: [PATCH] Improved perf of reading integers from json. (#340) --- src/io/json/read/deserialize.rs | 36 +++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/src/io/json/read/deserialize.rs b/src/io/json/read/deserialize.rs index 53811f19022..f06b51b7132 100644 --- a/src/io/json/read/deserialize.rs +++ b/src/io/json/read/deserialize.rs @@ -75,7 +75,19 @@ fn build_extract(data_type: &DataType) -> Extract { } } -fn read_primitive( +fn read_int( + rows: &[&Value], + data_type: DataType, +) -> PrimitiveArray { + let iter = rows.iter().map(|row| match row { + Value::Number(number) => number.as_i64().and_then(num::cast::cast::), + Value::Bool(number) => num::cast::cast::(*number as i32), + _ => None, + }); + PrimitiveArray::from_trusted_len_iter(iter).to(data_type) +} + +fn read_float( rows: &[&Value], data_type: DataType, ) -> PrimitiveArray { @@ -214,14 +226,12 @@ pub fn read(rows: &[&Value], data_type: DataType) -> Arc { match &data_type { DataType::Null => Arc::new(NullArray::from_data(rows.len())), DataType::Boolean => Arc::new(read_boolean(rows)), - DataType::Int8 => Arc::new(read_primitive::(rows, data_type)), - DataType::Int16 => Arc::new(read_primitive::(rows, data_type)), + DataType::Int8 => Arc::new(read_int::(rows, data_type)), + DataType::Int16 => Arc::new(read_int::(rows, data_type)), DataType::Int32 | DataType::Date32 | DataType::Time32(_) - | DataType::Interval(IntervalUnit::YearMonth) => { - Arc::new(read_primitive::(rows, data_type)) - } + | DataType::Interval(IntervalUnit::YearMonth) => Arc::new(read_int::(rows, data_type)), DataType::Interval(IntervalUnit::DayTime) => { unimplemented!("There is no natural representation of DayTime in JSON.") } @@ -229,14 +239,14 @@ pub fn read(rows: &[&Value], data_type: DataType) -> Arc { | DataType::Date64 | DataType::Time64(_) | DataType::Timestamp(_, _) - | DataType::Duration(_) => Arc::new(read_primitive::(rows, data_type)), - DataType::UInt8 => Arc::new(read_primitive::(rows, data_type)), - DataType::UInt16 => Arc::new(read_primitive::(rows, data_type)), - DataType::UInt32 => Arc::new(read_primitive::(rows, data_type)), - DataType::UInt64 => Arc::new(read_primitive::(rows, data_type)), + | DataType::Duration(_) => Arc::new(read_int::(rows, data_type)), + DataType::UInt8 => Arc::new(read_int::(rows, data_type)), + DataType::UInt16 => Arc::new(read_int::(rows, data_type)), + DataType::UInt32 => Arc::new(read_int::(rows, data_type)), + DataType::UInt64 => Arc::new(read_int::(rows, data_type)), DataType::Float16 => unreachable!(), - DataType::Float32 => Arc::new(read_primitive::(rows, data_type)), - DataType::Float64 => Arc::new(read_primitive::(rows, data_type)), + DataType::Float32 => Arc::new(read_float::(rows, data_type)), + DataType::Float64 => Arc::new(read_float::(rows, data_type)), DataType::Utf8 => Arc::new(read_utf8::(rows)), DataType::LargeUtf8 => Arc::new(read_utf8::(rows)), DataType::List(_) => Arc::new(read_list::(rows, data_type)),