Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(decimal): add operation for decimal #9926

Merged
merged 17 commits into from
Feb 10, 2023
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

82 changes: 81 additions & 1 deletion src/meta/proto-conv/src/schema_from_to_protobuf_impl.rs
Original file line number Diff line number Diff line change
@@ -206,6 +206,9 @@ impl FromToProto for ex::TableDataType {
}
}
Dt24::VariantT(_) => ex::TableDataType::Variant,
Dt24::DecimalT(x) => {
ex::TableDataType::Decimal(ex::types::decimal::DecimalDataType::from_pb(x)?)
}
};
Ok(x)
}
@@ -226,7 +229,10 @@ impl FromToProto for ex::TableDataType {
let x = n.to_pb()?;
new_pb_dt24(Dt24::NumberT(x))
}
TableDataType::Decimal(_) => unimplemented!("decimal type is not supported"),
TableDataType::Decimal(n) => {
let x = n.to_pb()?;
new_pb_dt24(Dt24::DecimalT(x))
}
TableDataType::Timestamp => new_pb_dt24(Dt24::TimestampT(pb::Empty {})),
TableDataType::Date => new_pb_dt24(Dt24::DateT(pb::Empty {})),
TableDataType::Nullable(v) => {
@@ -322,6 +328,80 @@ impl FromToProto for ex::types::NumberDataType {
}
}

impl FromToProto for ex::types::DecimalDataType {
type PB = pb::Decimal;

fn get_pb_ver(p: &Self::PB) -> u64 {
p.ver
}

fn from_pb(p: pb::Decimal) -> Result<Self, Incompatible> {
reader_check_msg(p.ver, p.min_reader_ver)?;

let num = match p.decimal {
None => {
return Err(Incompatible {
reason: "Invalid Decimal: .decimal can not be None".to_string(),
});
}
Some(x) => x,
};

let x = match num {
pb::decimal::Decimal::Decimal128(x) => {
ex::types::DecimalDataType::Decimal128(ex::types::decimal::DecimalSize::from_pb(x)?)
}
pb::decimal::Decimal::Decimal256(x) => {
ex::types::DecimalDataType::Decimal256(ex::types::decimal::DecimalSize::from_pb(x)?)
}
};
Ok(x)
}

fn to_pb(&self) -> Result<pb::Decimal, Incompatible> {
let x = match self {
ex::types::DecimalDataType::Decimal128(x) => {
pb::decimal::Decimal::Decimal128(ex::types::decimal::DecimalSize::to_pb(x)?)
}
ex::types::DecimalDataType::Decimal256(x) => {
pb::decimal::Decimal::Decimal256(ex::types::decimal::DecimalSize::to_pb(x)?)
}
};
Ok(pb::Decimal {
ver: VER,
min_reader_ver: MIN_READER_VER,

decimal: Some(x),
})
}
}

impl FromToProto for ex::types::decimal::DecimalSize {
type PB = pb::DecimalSize;

fn get_pb_ver(p: &Self::PB) -> u64 {
p.ver
}

fn from_pb(p: Self::PB) -> Result<Self, Incompatible>
where Self: Sized {
reader_check_msg(p.ver, p.min_reader_ver)?;
Ok(ex::types::decimal::DecimalSize {
precision: p.precision as u8,
scale: p.scale as u8,
})
}

fn to_pb(&self) -> Result<Self::PB, Incompatible> {
Ok(pb::DecimalSize {
ver: VER,
min_reader_ver: MIN_READER_VER,
precision: self.precision as i32,
scale: self.scale as i32,
})
}
}

/// Create a pb::DataType with version-24 data type schema
fn new_pb_dt24(dt24: Dt24) -> pb::DataType {
pb::DataType {
1 change: 1 addition & 0 deletions src/meta/proto-conv/src/util.rs
Original file line number Diff line number Diff line change
@@ -85,6 +85,7 @@ const META_CHANGE_LOG: &[(u64, &str)] = &[
26,
"2023-01-16: Add: metadata.proto/DataSchema::next_column_id",
),
(27, "2023-02-10: Add: metadata.proto/DataType Decimal types"),
// Dear developer:
// If you're gonna add a new metadata version, you'll have to add a test for it.
// You could just copy an existing test file(e.g., `../tests/it/v024_table_meta.rs`)
21 changes: 21 additions & 0 deletions src/meta/protos/proto/datatype.proto
Original file line number Diff line number Diff line change
@@ -60,6 +60,7 @@ message DataType {
DataType map_t = 40;
Tuple tuple_t = 41;
Empty variant_t = 42;
Decimal decimal_t = 43;
}
}

@@ -179,5 +180,25 @@ message Number {
}
}


message Decimal {
uint64 ver = 100;
uint64 min_reader_ver = 101;

oneof decimal {
DecimalSize decimal_128 = 1;
DecimalSize decimal_256 = 2;
}
}

message DecimalSize {
uint64 ver = 100;
uint64 min_reader_ver = 101;

int32 precision = 1;
int32 scale = 2;
}


// Place holder type for primitive types
message Empty {}
26 changes: 22 additions & 4 deletions src/query/expression/src/function.rs
Original file line number Diff line number Diff line change
@@ -145,6 +145,26 @@ pub struct Function {
pub eval: Box<dyn Fn(&[ValueRef<AnyType>], &mut EvalContext) -> Value<AnyType> + Send + Sync>,
}

impl Function {
pub fn wrap_nullable(self) -> Self {
Self {
signature: FunctionSignature {
name: self.signature.name.clone(),
args_type: self
.signature
.args_type
.iter()
.map(|ty| ty.wrap_nullable())
.collect(),
return_type: self.signature.return_type.wrap_nullable(),
property: self.signature.property.clone(),
},
calc_domain: Box::new(|_| FunctionDomain::Full),
eval: Box::new(wrap_nullable(self.eval)),
}
}
}

#[derive(Default)]
pub struct FunctionRegistry {
pub funcs: HashMap<String, Vec<Arc<Function>>>,
@@ -278,10 +298,8 @@ impl FunctionRegistry {
}
}

pub fn wrap_nullable<F>(
f: F,
) -> impl Fn(&[ValueRef<AnyType>], &mut EvalContext) -> Value<AnyType> + Copy
where F: Fn(&[ValueRef<AnyType>], &mut EvalContext) -> Value<AnyType> + Copy {
pub fn wrap_nullable<F>(f: F) -> impl Fn(&[ValueRef<AnyType>], &mut EvalContext) -> Value<AnyType>
where F: Fn(&[ValueRef<AnyType>], &mut EvalContext) -> Value<AnyType> {
move |args, ctx| {
type T = NullableType<AnyType>;
type Result = AnyType;
2 changes: 1 addition & 1 deletion src/query/expression/src/kernels/group_by_hash.rs
Original file line number Diff line number Diff line change
@@ -490,7 +490,7 @@ pub fn serialize_column_binary(column: &Column, row: usize, vec: &mut Vec<u8>) {
Column::String(v) => {
BinaryWrite::write_binary(vec, unsafe { v.index_unchecked(row) }).unwrap()
}
Column::Decimal(_) => todo!("decimal"),
Column::Decimal(_) => unreachable!("Decimal is not supported in group by keys format"),
Column::Timestamp(v) => vec.extend_from_slice(v[row].to_le_bytes().as_ref()),
Column::Date(v) => vec.extend_from_slice(v[row].to_le_bytes().as_ref()),
Column::Array(array) => {
23 changes: 22 additions & 1 deletion src/query/expression/src/property.rs
Original file line number Diff line number Diff line change
@@ -13,6 +13,7 @@
// limitations under the License.

use enum_as_inner::EnumAsInner;
use ethnum::i256;

use crate::types::boolean::BooleanDomain;
use crate::types::decimal::DecimalDomain;
@@ -139,7 +140,27 @@ impl Domain {
DataType::Number(NumberDataType::Float64) => {
Domain::Number(NumberDomain::Float64(NumberType::<F64>::full_domain()))
}
DataType::Decimal(_) => todo!("decimal"),
// useless domain, we don't support min/max index for decimal type
DataType::Decimal(x) => match x {
crate::types::DecimalDataType::Decimal128(x) => {
Domain::Decimal(DecimalDomain::Decimal128(
SimpleDomain {
min: i128::MIN,
max: i128::MAX,
},
*x,
))
}
crate::types::DecimalDataType::Decimal256(x) => {
Domain::Decimal(DecimalDomain::Decimal256(
SimpleDomain {
min: i256::MIN,
max: i256::MAX,
},
*x,
))
}
},
DataType::Timestamp => Domain::Timestamp(TimestampType::full_domain()),
DataType::Date => Domain::Date(DateType::full_domain()),
DataType::Null => Domain::Nullable(NullableDomain {
23 changes: 21 additions & 2 deletions src/query/expression/src/schema.rs
Original file line number Diff line number Diff line change
@@ -27,6 +27,7 @@ use common_exception::Result;
use common_jsonb::Number as JsonbNumber;
use common_jsonb::Object as JsonbObject;
use common_jsonb::Value as JsonbValue;
use ethnum::i256;
use itertools::Itertools;
use rand::distributions::Alphanumeric;
use rand::distributions::DistString;
@@ -39,6 +40,7 @@ use serde::Serialize;
use crate::types::array::ArrayColumn;
use crate::types::date::DATE_MAX;
use crate::types::date::DATE_MIN;
use crate::types::decimal::DecimalColumn;
use crate::types::decimal::DecimalDataType;
use crate::types::decimal::DecimalSize;
use crate::types::nullable::NullableColumn;
@@ -958,7 +960,23 @@ impl TableDataType {
),
})),
},
TableDataType::Decimal(_) => todo!("decimal"),
// useless for now.
TableDataType::Decimal(t) => match t {
DecimalDataType::Decimal128(x) => BlockEntry {
data_type: DataType::Decimal(*t),
value: Value::Column(Column::Decimal(DecimalColumn::Decimal128(
vec![0i128; len].into(),
*x,
))),
},
DecimalDataType::Decimal256(x) => BlockEntry {
data_type: DataType::Decimal(*t),
value: Value::Column(Column::Decimal(DecimalColumn::Decimal256(
vec![i256::ZERO; len].into(),
*x,
))),
},
},
TableDataType::Timestamp => BlockEntry {
data_type: DataType::Timestamp,
value: Value::Column(TimestampType::from_data(
@@ -1353,6 +1371,7 @@ pub fn infer_schema_type(data_type: &DataType) -> Result<TableDataType> {
DataType::String => Ok(TableDataType::String),
DataType::Number(number_type) => Ok(TableDataType::Number(*number_type)),
DataType::Timestamp => Ok(TableDataType::Timestamp),
DataType::Decimal(x) => Ok(TableDataType::Decimal(*x)),
DataType::Date => Ok(TableDataType::Date),
DataType::Nullable(inner_type) => Ok(TableDataType::Nullable(Box::new(infer_schema_type(
inner_type,
@@ -1379,7 +1398,7 @@ pub fn infer_schema_type(data_type: &DataType) -> Result<TableDataType> {
fields_type,
})
}
_ => Err(ErrorCode::SemanticError(format!(
DataType::Generic(_) => Err(ErrorCode::SemanticError(format!(
"Cannot create table with type: {}",
data_type
))),
20 changes: 17 additions & 3 deletions src/query/expression/src/type_check.rs
Original file line number Diff line number Diff line change
@@ -146,7 +146,14 @@ pub fn check_cast<Index: ColumnIndex>(
} else {
// fast path to eval function for cast
if let Some(cast_fn) = get_simple_cast_function(is_try, dest_type) {
if let Ok(cast_expr) = check_function(span, &cast_fn, &[], &[expr.clone()], fn_registry)
let params = if let DataType::Decimal(ty) = dest_type {
vec![ty.precision() as usize, ty.scale() as usize]
} else {
vec![]
};

if let Ok(cast_expr) =
check_function(span, &cast_fn, &params, &[expr.clone()], fn_registry)
{
if cast_expr.data_type() == &wrapped_dest_type {
return Ok(cast_expr);
@@ -330,6 +337,7 @@ pub fn try_check_function<Index: ColumnIndex>(
.zip(&sig.args_type)
.map(|(src_ty, dest_ty)| unify(src_ty, dest_ty, additional_rules))
.collect::<Result<Vec<_>>>()?;

let subst = substs
.into_iter()
.try_reduce(|subst1, subst2| subst1.merge(subst2))?
@@ -430,6 +438,8 @@ pub fn can_auto_cast_to(src_ty: &DataType, dest_ty: &DataType) -> bool {
|| src_num_ty.can_lossless_cast_to(*dest_num_ty)
}

(DataType::Number(_) | DataType::Decimal(_), DataType::Decimal(_)) => true,

// Note: comment these because : select 'str' -1 will auto transform into: `minus(CAST('str' AS Date), CAST(1 AS Int64))`
// (DataType::String, DataType::Date) => true,
// (DataType::String, DataType::Timestamp) => true,
@@ -477,7 +487,10 @@ pub fn common_super_type(ty1: DataType, ty2: DataType) -> Option<DataType> {
}

pub fn get_simple_cast_function(is_try: bool, dest_type: &DataType) -> Option<String> {
let function_name = format!("to_{}", dest_type.to_string().to_lowercase());
let mut function_name = format!("to_{}", dest_type.to_string().to_lowercase());
if dest_type.is_decimal() {
function_name = "to_decimal".to_owned();
}

if is_simple_cast_function(&function_name) {
let prefix = if is_try { "try_" } else { "" };
@@ -488,7 +501,7 @@ pub fn get_simple_cast_function(is_try: bool, dest_type: &DataType) -> Option<St
}

pub fn is_simple_cast_function(name: &str) -> bool {
const SIMPLE_CAST_FUNCTIONS: &[&str; 15] = &[
const SIMPLE_CAST_FUNCTIONS: &[&str; 16] = &[
"to_string",
"to_uint8",
"to_uint16",
@@ -504,6 +517,7 @@ pub fn is_simple_cast_function(name: &str) -> bool {
"to_date",
"to_variant",
"to_boolean",
"to_decimal",
];
SIMPLE_CAST_FUNCTIONS.contains(&name)
}
6 changes: 5 additions & 1 deletion src/query/expression/src/types.rs
Original file line number Diff line number Diff line change
@@ -41,7 +41,7 @@ pub use self::any::AnyType;
pub use self::array::ArrayType;
pub use self::boolean::BooleanType;
pub use self::date::DateType;
use self::decimal::DecimalDataType;
pub use self::decimal::DecimalDataType;
pub use self::empty_array::EmptyArrayType;
pub use self::generic::GenericType;
pub use self::map::MapType;
@@ -144,6 +144,10 @@ impl DataType {
}
}

pub fn is_decimal(&self) -> bool {
matches!(self, DataType::Decimal(_ty))
}

#[inline]
pub fn is_date_or_date_time(&self) -> bool {
matches!(self, DataType::Timestamp | DataType::Date)
Loading