apache · Weijun-H · Dec 11, 2023 · Dec 11, 2023 · Dec 15, 2023 · Dec 15, 2023
diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock
diff --git a/datafusion/sql/Cargo.toml b/datafusion/sql/Cargo.toml
@@ -39,6 +39,7 @@ unicode_expressions = []
 [dependencies]
 arrow = { workspace = true }
 arrow-schema = { workspace = true }
+bigdecimal = { workspace = true }
 datafusion-common = { workspace = true }
 datafusion-expr = { workspace = true }
 log = { workspace = true }

diff --git a/datafusion/sql/src/expr/value.rs b/datafusion/sql/src/expr/value.rs
@@ -17,8 +17,9 @@
 
 use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
 use arrow::compute::kernels::cast_utils::parse_interval_month_day_nano;
-use arrow::datatypes::DECIMAL128_MAX_PRECISION;
-use arrow_schema::DataType;
+use arrow::datatypes::{ArrowNativeTypeOp, DECIMAL128_MAX_PRECISION};
+use arrow_schema::{DataType, DECIMAL128_MAX_SCALE};
+use bigdecimal::BigDecimal;
 use datafusion_common::{
     not_impl_err, plan_err, DFSchema, DataFusionError, Result, ScalarValue,
 };
@@ -30,6 +31,7 @@ use log::debug;
 use sqlparser::ast::{BinaryOperator, Expr as SQLExpr, Interval, Value};
 use sqlparser::parser::ParserError::ParserError;
 use std::borrow::Cow;
+use std::str::FromStr;
 
 impl<'a, S: ContextProvider> SqlToRel<'a, S> {
     pub(crate) fn parse_value(
@@ -405,39 +407,48 @@ const fn try_decode_hex_char(c: u8) -> Option<u8> {
 }
 
 /// Parse Decimal128 from a string
-///
-/// TODO: support parsing from scientific notation
 fn parse_decimal_128(unsigned_number: &str, negative: bool) -> Result<Expr> {
-    // remove leading zeroes
-    let trimmed = unsigned_number.trim_start_matches('0');
-    // parse precision and scale, remove decimal point if exists
-    let (precision, scale, replaced_str) = if trimmed == "." {
-        // special cases for numbers such as “0.”, “000.”, and so on.
-        (1, 0, Cow::Borrowed("0"))
-    } else if let Some(i) = trimmed.find('.') {
+    let big_decimal = BigDecimal::from_str(unsigned_number).map_err(|e| {
+        DataFusionError::from(ParserError(format!(
+            "Cannot parse {unsigned_number} into BigDecimal when building decimal: {e}"
+        )))
+    })?;
+
+    let replaced_str = big_decimal.to_string().replace('.', "");
+
+    let (precision, scale) = if big_decimal.fractional_digit_count() < 0 {
         (
-            trimmed.len() - 1,
-            trimmed.len() - i - 1,
-            Cow::Owned(trimmed.replace('.', "")),
+            (replaced_str.len() as i64)
+                .add_checked(big_decimal.fractional_digit_count())? as u64,
+            big_decimal.fractional_digit_count(),
         )
     } else {
-        // no decimal point, keep as is
-        (trimmed.len(), 0, Cow::Borrowed(trimmed))
+        (
+            replaced_str.len() as u64,
+            big_decimal.fractional_digit_count(),
+        )
     };
 
+    // check precision overflow
+    if precision > DECIMAL128_MAX_PRECISION as u64 {
+        return Err(DataFusionError::from(ParserError(format!(
+                    "Cannot parse {unsigned_number} as i128 when building decimal: precision overflow"
+                ))));
+    }
+
+    // check scale overflow
+    if scale < -DECIMAL128_MAX_SCALE as i64 || scale > DECIMAL128_MAX_SCALE as i64 {
+        return Err(DataFusionError::from(ParserError(format!(
+                    "Cannot parse {unsigned_number} as i128 when building decimal: scale overflow"
+                ))));
+    }
+
     let number = replaced_str.parse::<i128>().map_err(|e| {
         DataFusionError::from(ParserError(format!(
             "Cannot parse {replaced_str} as i128 when building decimal: {e}"
         )))
     })?;
 
-    // check precision overflow
-    if precision as u8 > DECIMAL128_MAX_PRECISION {
-        return Err(DataFusionError::from(ParserError(format!(
-            "Cannot parse {replaced_str} as i128 when building decimal: precision overflow"
-        ))));
-    }
-
     Ok(Expr::Literal(ScalarValue::Decimal128(
         Some(if negative { -number } else { number }),
         precision as u8,
@@ -468,4 +479,10 @@ mod tests {
             assert_eq!(output, expect);
         }
     }
+
+    #[test]
+    fn test_parse_128_decimal_number() {
+        let number = "1.23456e10";
+        parse_decimal_128(number, false).unwrap();
+    }
 }
diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs
@@ -43,8 +43,8 @@ fn parse_decimals() {
     let test_data = [
         ("1", "Int64(1)"),
         ("001", "Int64(1)"),
-        ("0.1", "Decimal128(Some(1),1,1)"),
-        ("0.01", "Decimal128(Some(1),2,2)"),
+        ("0.1", "Decimal128(Some(1),2,1)"),
+        ("0.01", "Decimal128(Some(1),3,2)"),
         ("1.0", "Decimal128(Some(10),2,1)"),
         ("10.01", "Decimal128(Some(1001),4,2)"),
         (

diff --git a/datafusion/sqllogictest/test_files/options.slt b/datafusion/sqllogictest/test_files/options.slt
@@ -147,7 +147,7 @@ select .0 as c1, 0. as c2, 0000. as c3, 00000.00 as c4
 query TTTT
 select arrow_typeof(.0) as c1, arrow_typeof(0.) as c2, arrow_typeof(0000.) as c3, arrow_typeof(00000.00) as c4
 ----
-Decimal128(1, 1) Decimal128(1, 0) Decimal128(1, 0) Decimal128(2, 2)
+Decimal128(2, 1) Decimal128(1, 0) Decimal128(1, 0) Decimal128(3, 2)
 
 query RR
 select 999999999999999999999999999999999999, -999999999999999999999999999999999999
@@ -193,19 +193,31 @@ select arrow_typeof(00009999999999999999999999999999999999.9999),
 Decimal128(38, 4) Decimal128(38, 4) Decimal128(20, 0)
 
 # precision overflow
-statement error DataFusion error: SQL error: ParserError\("Cannot parse 123456789012345678901234567890123456789 as i128 when building decimal: precision overflow"\)
+statement error DataFusion error: SQL error: ParserError\("Cannot parse 123456789.012345678901234567890123456789 as i128 when building decimal: precision overflow"\)
 select 123456789.012345678901234567890123456789
 
-statement error SQL error: ParserError\("Cannot parse 123456789012345678901234567890123456789 as i128 when building decimal: precision overflow"\)
+statement error SQL error: ParserError\("Cannot parse 123456789.012345678901234567890123456789 as i128 when building decimal: precision overflow"\)
 select -123456789.012345678901234567890123456789
 
 # can not fit in i128
-statement error SQL error: ParserError\("Cannot parse 1234567890123456789012345678901234567890 as i128 when building decimal: number too large to fit in target type"\)
+statement error SQL error: ParserError\("Cannot parse 123456789.0123456789012345678901234567890 as i128 when building decimal: precision overflow"\)
 select 123456789.0123456789012345678901234567890
 
-statement error SQL error: ParserError\("Cannot parse 1234567890123456789012345678901234567890 as i128 when building decimal: number too large to fit in target type"\)
+statement error SQL error: ParserError\("Cannot parse 123456789.0123456789012345678901234567890 as i128 when building decimal: precision overflow"\)
 select -123456789.0123456789012345678901234567890
 
+# scientific notation
+query RRRR
+select 1.234e2, 1.234e-2, -1.234e2, -1.234e-2
+----
+123.4 0.01234 -123.4 -0.01234
+
+statement error DataFusion error: SQL error: ParserError\("Cannot parse 1.234e-40 as i128 when building decimal: precision overflow"\)
+select 1.234e-40
+
+statement error DataFusion error: SQL error: ParserError\("Cannot parse 12340000000000000000000000000000000000000 as i128 when building decimal: number too large to fit in target type"\)
+select 1.234e+40
+
 # Restore option to default value
 statement ok
 set datafusion.sql_parser.parse_float_as_decimal = false;