Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(expr): add Decimal128 and Decimal256 type #9856

Merged
merged 21 commits into from
Feb 4, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions src/meta/proto-conv/src/schema_from_to_protobuf_impl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,7 @@ impl FromToProto for ex::TableDataType {
let x = n.to_pb()?;
new_pb_dt24(Dt24::NumberT(x))
}
TableDataType::Decimal(_) => todo!("decimal"),
andylokandy marked this conversation as resolved.
Show resolved Hide resolved
TableDataType::Timestamp => new_pb_dt24(Dt24::TimestampT(pb::Empty {})),
TableDataType::Date => new_pb_dt24(Dt24::DateT(pb::Empty {})),
TableDataType::Nullable(v) => {
Expand Down
1 change: 1 addition & 0 deletions src/query/expression/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ common-jsonb = { path = "../../common/jsonb" }
educe = "0.4"
enum-as-inner = "0.5"
enum_dispatch = "0.3.8"
ethnum = { version = "1.3", features = ["serde"] }
andylokandy marked this conversation as resolved.
Show resolved Hide resolved
futures = "0.3.24"
hex = "0.4.3"
itertools = "0.10"
Expand Down
1 change: 1 addition & 0 deletions src/query/expression/src/converts/to.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ pub fn scalar_to_datavalue(scalar: &Scalar) -> DataValue {
}
crate::types::number::NumberScalar::Float64(x) => DataValue::Float64((*x).into()),
},
Scalar::Decimal(_) => unimplemented!("decimal is unsupported"),
Scalar::Timestamp(x) => DataValue::Int64(*x),
Scalar::Date(x) => DataValue::Int64(*x as i64),
Scalar::Boolean(x) => DataValue::Boolean(*x),
Expand Down
17 changes: 17 additions & 0 deletions src/query/expression/src/kernels/concat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ use common_exception::Result;
use itertools::Itertools;

use crate::types::array::ArrayColumnBuilder;
use crate::types::decimal::DecimalColumn;
use crate::types::nullable::NullableColumn;
use crate::types::number::NumberColumn;
use crate::types::string::StringColumnBuilder;
Expand All @@ -33,6 +34,7 @@ use crate::types::StringType;
use crate::types::TimestampType;
use crate::types::ValueType;
use crate::types::VariantType;
use crate::with_decimal_type;
use crate::with_number_mapped_type;
use crate::BlockEntry;
use crate::Column;
Expand Down Expand Up @@ -104,6 +106,21 @@ impl Column {
Self::concat_arg_types::<NumberType<NUM_TYPE>>(columns)
}
}),
Column::Decimal(col) => with_decimal_type!(|DECIMAL_TYPE| match col {
DecimalColumn::DECIMAL_TYPE(_, size) => {
let mut builder = Vec::with_capacity(capacity);
for c in columns {
match c {
Column::Decimal(DecimalColumn::DECIMAL_TYPE(col, size)) => {
debug_assert_eq!(size, size);
builder.extend_from_slice(col);
}
_ => unreachable!(),
}
}
Column::Decimal(DecimalColumn::DECIMAL_TYPE(builder.into(), *size))
}
}),
Column::Boolean(_) => Self::concat_arg_types::<BooleanType>(columns),
Column::String(_) => {
let data_capacity = columns.iter().map(|c| c.memory_size() - c.len() * 8).sum();
Expand Down
10 changes: 10 additions & 0 deletions src/query/expression/src/kernels/filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ use common_exception::Result;

use crate::filter_helper::FilterHelpers;
use crate::types::array::ArrayColumnBuilder;
use crate::types::decimal::DecimalColumn;
use crate::types::nullable::NullableColumn;
use crate::types::number::NumberColumn;
use crate::types::string::StringColumnBuilder;
Expand All @@ -31,6 +32,7 @@ use crate::types::BooleanType;
use crate::types::StringType;
use crate::types::ValueType;
use crate::types::VariantType;
use crate::with_decimal_type;
use crate::with_number_type;
use crate::BlockEntry;
use crate::Column;
Expand Down Expand Up @@ -115,6 +117,14 @@ impl Column {
)))
}
}),
Column::Decimal(column) => with_decimal_type!(|DECIMAL_TYPE| match column {
DecimalColumn::DECIMAL_TYPE(values, size) => {
Column::Decimal(DecimalColumn::DECIMAL_TYPE(
Self::filter_primitive_types(values, filter),
*size,
))
}
}),
Column::Boolean(bm) => Self::filter_scalar_types::<BooleanType>(
bm,
MutableBitmap::with_capacity(length),
Expand Down
1 change: 1 addition & 0 deletions src/query/expression/src/kernels/group_by_hash.rs
Original file line number Diff line number Diff line change
Expand Up @@ -490,6 +490,7 @@ pub fn serialize_column_binary(column: &Column, row: usize, vec: &mut Vec<u8>) {
Column::String(v) => {
BinaryWrite::write_binary(vec, unsafe { v.index_unchecked(row) }).unwrap()
}
Column::Decimal(_) => todo!("decimal"),
Column::Timestamp(v) => vec.extend_from_slice(v[row].to_le_bytes().as_ref()),
Column::Date(v) => vec.extend_from_slice(v[row].to_le_bytes().as_ref()),
Column::Array(array) => {
Expand Down
17 changes: 17 additions & 0 deletions src/query/expression/src/kernels/scatter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,10 @@

use common_arrow::arrow::bitmap::MutableBitmap;
use common_exception::Result;
use itertools::Itertools;

use crate::types::array::ArrayColumnBuilder;
use crate::types::decimal::DecimalColumn;
use crate::types::nullable::NullableColumn;
use crate::types::number::NumberColumn;
use crate::types::string::StringColumnBuilder;
Expand All @@ -29,6 +31,7 @@ use crate::types::StringType;
use crate::types::TimestampType;
use crate::types::ValueType;
use crate::types::VariantType;
use crate::with_decimal_type;
use crate::with_number_mapped_type;
use crate::BlockEntry;
use crate::Column;
Expand Down Expand Up @@ -137,6 +140,20 @@ impl Column {
scatter_size
),
}),
Column::Decimal(column) => with_decimal_type!(|DECIMAL_TYPE| match column {
DecimalColumn::DECIMAL_TYPE(values, size) => {
let mut builder = (0..scatter_size)
.map(|_| Vec::with_capacity(length))
.collect_vec();
for (index, item) in indices.iter().zip(values.iter()) {
builder[index.to_usize()].push(*item);
}
builder
.into_iter()
.map(|v| Column::Decimal(DecimalColumn::DECIMAL_TYPE(v.into(), *size)))
.collect()
}
}),
Column::EmptyArray { .. } => Self::scatter_repeat_scalars::<I>(
&Scalar::EmptyArray,
data_type,
Expand Down
12 changes: 12 additions & 0 deletions src/query/expression/src/kernels/take.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,10 @@
// limitations under the License.

use common_exception::Result;
use itertools::Itertools;

use crate::types::array::ArrayColumnBuilder;
use crate::types::decimal::DecimalColumn;
use crate::types::nullable::NullableColumn;
use crate::types::number::NumberColumn;
use crate::types::AnyType;
Expand All @@ -25,6 +27,7 @@ use crate::types::NumberType;
use crate::types::StringType;
use crate::types::ValueType;
use crate::types::VariantType;
use crate::with_decimal_type;
use crate::with_number_mapped_type;
use crate::BlockEntry;
use crate::Column;
Expand Down Expand Up @@ -69,6 +72,15 @@ impl Column {
NumberColumn::NUM_TYPE(values) =>
Self::take_arg_types::<NumberType<NUM_TYPE>, _>(values, indices),
}),
Column::Decimal(column) => with_decimal_type!(|DECIMAL_TYPE| match column {
DecimalColumn::DECIMAL_TYPE(values, size) => {
let builder = indices
.iter()
.map(|index| unsafe { *values.get_unchecked(index.to_usize()) })
.collect_vec();
Column::Decimal(DecimalColumn::DECIMAL_TYPE(builder.into(), *size))
}
}),
Column::Boolean(bm) => Self::take_arg_types::<BooleanType, _>(bm, indices),
Column::String(column) => Self::take_arg_types::<StringType, _>(column, indices),
Column::Timestamp(column) => {
Expand Down
37 changes: 30 additions & 7 deletions src/query/expression/src/kernels/take_chunks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ use common_arrow::arrow::compute::merge_sort::MergeSlice;
use itertools::Itertools;

use crate::types::array::ArrayColumnBuilder;
use crate::types::decimal::DecimalColumn;
use crate::types::nullable::NullableColumn;
use crate::types::number::NumberColumn;
use crate::types::AnyType;
Expand All @@ -29,6 +30,7 @@ use crate::types::StringType;
use crate::types::TimestampType;
use crate::types::ValueType;
use crate::types::VariantType;
use crate::with_decimal_type;
use crate::with_number_mapped_type;
use crate::BlockEntry;
use crate::Column;
Expand All @@ -38,7 +40,7 @@ use crate::Scalar;
use crate::TypeDeserializer;
use crate::Value;

// Chunk idx, row idx in the block, times
// Block idx, row idx in the block, repeat times
pub type BlockRowIndex = (usize, usize, usize);

impl DataBlock {
Expand Down Expand Up @@ -198,6 +200,25 @@ impl Column {
Self::take_block_value_types::<NumberType<NUM_TYPE>>(columns, builder, indices)
}
}),
Column::Decimal(column) => with_decimal_type!(|DECIMAL_TYPE| match column {
DecimalColumn::DECIMAL_TYPE(_, size) => {
let columns = columns
.iter()
.map(|col| match col {
Column::Decimal(DecimalColumn::DECIMAL_TYPE(col, _)) => col,
_ => unreachable!(),
})
.collect_vec();
let mut builder = Vec::with_capacity(result_size);
for &(block_index, row, times) in indices {
let val = unsafe { columns[block_index].get_unchecked(row) };
for _ in 0..times {
builder.push(*val);
}
}
Column::Decimal(DecimalColumn::DECIMAL_TYPE(builder.into(), *size))
}
}),
Column::Boolean(_) => {
let builder = BooleanType::create_builder(result_size, &[]);
Self::take_block_value_types::<BooleanType>(columns, builder, indices)
Expand Down Expand Up @@ -303,12 +324,14 @@ impl Column {
mut builder: T::ColumnBuilder,
indices: &[BlockRowIndex],
) -> Column {
unsafe {
for &(block_index, row, times) in indices {
let col = T::try_downcast_column(&columns[block_index]).unwrap();
for _ in 0..times {
T::push_item(&mut builder, T::index_column_unchecked(&col, row))
}
let columns = columns
.iter()
.map(|col| T::try_downcast_column(col).unwrap())
.collect_vec();
for &(block_index, row, times) in indices {
let val = unsafe { T::index_column_unchecked(&columns[block_index], row) };
for _ in 0..times {
T::push_item(&mut builder, val.clone())
}
}
T::upcast_column(T::build_column(builder))
Expand Down
3 changes: 3 additions & 0 deletions src/query/expression/src/property.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
use enum_as_inner::EnumAsInner;

use crate::types::boolean::BooleanDomain;
use crate::types::decimal::DecimalDomain;
use crate::types::nullable::NullableDomain;
use crate::types::number::NumberDomain;
use crate::types::number::NumberScalar;
Expand Down Expand Up @@ -67,6 +68,7 @@ pub enum FunctionDomain<T: ValueType> {
#[derive(Debug, Clone, PartialEq, EnumAsInner)]
pub enum Domain {
Number(NumberDomain),
Decimal(DecimalDomain),
Boolean(BooleanDomain),
String(StringDomain),
Timestamp(SimpleDomain<i64>),
Expand Down Expand Up @@ -137,6 +139,7 @@ impl Domain {
DataType::Number(NumberDataType::Float64) => {
Domain::Number(NumberDomain::Float64(NumberType::<F64>::full_domain()))
}
DataType::Decimal(_) => todo!("decimal"),
DataType::Timestamp => Domain::Timestamp(TimestampType::full_domain()),
DataType::Date => Domain::Date(DateType::full_domain()),
DataType::Null => Domain::Nullable(NullableDomain {
Expand Down
22 changes: 22 additions & 0 deletions src/query/expression/src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ use serde::Serialize;
use crate::types::array::ArrayColumn;
use crate::types::date::DATE_MAX;
use crate::types::date::DATE_MIN;
use crate::types::decimal::DecimalDataType;
use crate::types::decimal::DecimalSize;
use crate::types::nullable::NullableColumn;
use crate::types::timestamp::TIMESTAMP_MAX;
use crate::types::timestamp::TIMESTAMP_MIN;
Expand Down Expand Up @@ -104,6 +106,7 @@ pub enum TableDataType {
Boolean,
String,
Number(NumberDataType),
Decimal(DecimalDataType),
Timestamp,
Date,
Nullable(Box<TableDataType>),
Expand Down Expand Up @@ -818,6 +821,7 @@ impl From<&TableDataType> for DataType {
TableDataType::Boolean => DataType::Boolean,
TableDataType::String => DataType::String,
TableDataType::Number(ty) => DataType::Number(*ty),
TableDataType::Decimal(ty) => DataType::Decimal(*ty),
TableDataType::Timestamp => DataType::Timestamp,
TableDataType::Date => DataType::Date,
TableDataType::Nullable(ty) => DataType::Nullable(Box::new((&**ty).into())),
Expand Down Expand Up @@ -954,6 +958,7 @@ impl TableDataType {
),
})),
},
TableDataType::Decimal(_) => todo!("decimal"),
TableDataType::Timestamp => BlockEntry {
data_type: DataType::Timestamp,
value: Value::Column(TimestampType::from_data(
Expand Down Expand Up @@ -1151,6 +1156,17 @@ impl From<&ArrowField> for TableDataType {
let ty = with_number_type!(|TYPE| match f.data_type() {
ArrowDataType::TYPE => TableDataType::Number(NumberDataType::TYPE),

ArrowDataType::Decimal(precision, scale) =>
TableDataType::Decimal(DecimalDataType::Decimal128(DecimalSize {
precision: *precision as u8,
scale: *scale as u8,
})),
ArrowDataType::Decimal256(precision, scale) =>
TableDataType::Decimal(DecimalDataType::Decimal256(DecimalSize {
precision: *precision as u8,
scale: *scale as u8,
})),

ArrowDataType::Null => return TableDataType::Null,
ArrowDataType::Boolean => TableDataType::Boolean,

Expand Down Expand Up @@ -1277,6 +1293,12 @@ impl From<&TableDataType> for ArrowDataType {
TableDataType::Number(ty) => with_number_type!(|TYPE| match ty {
NumberDataType::TYPE => ArrowDataType::TYPE,
}),
TableDataType::Decimal(DecimalDataType::Decimal128(size)) => {
ArrowDataType::Decimal(size.precision as usize, size.scale as usize)
}
TableDataType::Decimal(DecimalDataType::Decimal256(size)) => {
ArrowDataType::Decimal256(size.precision as usize, size.scale as usize)
}
TableDataType::Timestamp => ArrowDataType::Timestamp(TimeUnit::Microsecond, None),
TableDataType::Date => ArrowDataType::Date32,
TableDataType::Nullable(ty) => ty.as_ref().into(),
Expand Down
2 changes: 1 addition & 1 deletion src/query/expression/src/type_check.rs
Original file line number Diff line number Diff line change
Expand Up @@ -460,7 +460,7 @@ pub fn common_super_type(ty1: DataType, ty2: DataType) -> Option<DataType> {
Some(DataType::Array(Box::new(common_super_type(ty1, ty2)?)))
}
(DataType::Number(num1), DataType::Number(num2)) => {
Some(DataType::Number(num1.lossful_super_type(num2)))
Some(DataType::Number(num1.super_type(num2)))
}

(DataType::String, DataType::Timestamp) | (DataType::Timestamp, DataType::String) => {
Expand Down
Loading