Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: support scientific notation in parse_float_as_decimal #8494

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 37 additions & 6 deletions datafusion/sql/src/expr/value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
use arrow::compute::kernels::cast_utils::parse_interval_month_day_nano;
use arrow::datatypes::DECIMAL128_MAX_PRECISION;
use arrow_schema::DataType;
use arrow_schema::{DataType, DECIMAL128_MAX_SCALE};
use datafusion_common::{
not_impl_err, plan_err, DFSchema, DataFusionError, Result, ScalarValue,
};
Expand Down Expand Up @@ -405,11 +405,29 @@ const fn try_decode_hex_char(c: u8) -> Option<u8> {
}

/// Parse Decimal128 from a string
///
/// TODO: support parsing from scientific notation
fn parse_decimal_128(unsigned_number: &str, negative: bool) -> Result<Expr> {
// remove leading zeroes
let trimmed = unsigned_number.trim_start_matches('0');

// check if the number is scientific notation
let parts = trimmed.split(|c| c == 'e' || c == 'E').collect::<Vec<_>>();
Weijun-H marked this conversation as resolved.
Show resolved Hide resolved

let (trimmed, e_scale) = if parts.len() == 1 {
(trimmed, 0)
} else if parts.len() == 2 {
let e_scale = parts[1].parse::<i16>().map_err(|e| {
DataFusionError::from(ParserError(format!(
"Cannot parse {} as i32 when building decimal: {e}",
parts[1]
)))
})?;
(parts[0], e_scale)
} else {
return Err(DataFusionError::from(ParserError(format!(
"Cannot parse {unsigned_number} as i128 when building decimal: invalid format"
))));
};

// parse precision and scale, remove decimal point if exists
let (precision, scale, replaced_str) = if trimmed == "." {
// special cases for numbers such as “0.”, “000.”, and so on.
Expand All @@ -425,17 +443,24 @@ fn parse_decimal_128(unsigned_number: &str, negative: bool) -> Result<Expr> {
(trimmed.len(), 0, Cow::Borrowed(trimmed))
};

let (precision, scale) = if e_scale > 0 {
(precision as i16 + e_scale, scale as i16 - e_scale)
} else {
(precision as i16 - e_scale, scale as i16 - e_scale)
};

let number = replaced_str.parse::<i128>().map_err(|e| {
DataFusionError::from(ParserError(format!(
"Cannot parse {replaced_str} as i128 when building decimal: {e}"
)))
})?;

// check precision overflow
if precision as u8 > DECIMAL128_MAX_PRECISION {
if precision > DECIMAL128_MAX_PRECISION as i16 || scale > DECIMAL128_MAX_SCALE as i16
{
return Err(DataFusionError::from(ParserError(format!(
"Cannot parse {replaced_str} as i128 when building decimal: precision overflow"
))));
"Cannot parse {replaced_str} as i128 when building decimal: precision overflow"
))));
}

Ok(Expr::Literal(ScalarValue::Decimal128(
Expand Down Expand Up @@ -468,4 +493,10 @@ mod tests {
assert_eq!(output, expect);
}
}

#[test]
fn test_parse_128_decimal_number() {
let number = "1.23456e10";
parse_decimal_128(number, false).unwrap();
}
}
12 changes: 12 additions & 0 deletions datafusion/sqllogictest/test_files/options.slt
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,18 @@ select 123456789.0123456789012345678901234567890
statement error SQL error: ParserError\("Cannot parse 1234567890123456789012345678901234567890 as i128 when building decimal: number too large to fit in target type"\)
select -123456789.0123456789012345678901234567890

# scientific notation
query RRRR
select 1.234e2, 1.234e-2, -1.234e2, -1.234e-2
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

1.234e-2 is not -123.4 I don't think 🤔

DataFusion CLI v33.0.0
❯ select 1.234e-2;
+------------------+
| Float64(0.01234) |
+------------------+
| 0.01234          |
+------------------+

----
123.4 0.01234 -123.4 -0.01234

statement error DataFusion error: SQL error: ParserError\("Cannot parse 1234 as i128 when building decimal: precision overflow"\)
select 1.234e-38

statement error DataFusion error: SQL error: ParserError\("Cannot parse 1234 as i128 when building decimal: precision overflow"\)
select 1.234e+38

# Restore option to default value
statement ok
set datafusion.sql_parser.parse_float_as_decimal = false;