diff --git a/native/spark-expr/src/string_funcs/string_space.rs b/native/spark-expr/src/string_funcs/string_space.rs index 00b880730a..4ab5362793 100644 --- a/native/spark-expr/src/string_funcs/string_space.rs +++ b/native/spark-expr/src/string_funcs/string_space.rs @@ -119,9 +119,14 @@ fn generic_string_space(length: &Int32Array) -> Arr let null_bit_buffer = length.to_data().nulls().map(|b| b.buffer().clone()); // Gets slice of length array to access it directly for performance. + // Negative length values are set to zero to match Spark behavior let length_data = length.to_data(); - let lengths = length_data.buffers()[0].typed_data::(); - let total = lengths.iter().map(|l| *l as usize).sum::(); + let lengths: Vec<_> = length_data.buffers()[0] + .typed_data::() + .iter() + .map(|l| (*l).max(0) as usize) + .collect(); + let total = lengths.iter().sum::(); let mut values = MutableBuffer::new(total); offsets.push(length_so_far); @@ -130,7 +135,7 @@ fn generic_string_space(length: &Int32Array) -> Arr values.resize(total, blank); (0..array_len).for_each(|i| { - let current_len = lengths[i] as usize; + let current_len = lengths[i]; length_so_far += OffsetSize::from_usize(current_len).unwrap(); offsets.push(length_so_far); @@ -149,3 +154,24 @@ fn generic_string_space(length: &Int32Array) -> Arr }; make_array(data) } + +#[cfg(test)] +mod tests { + use super::*; + use arrow::array::StringArray; + use datafusion::common::cast::as_string_array; + + #[test] + fn test_negative_length() { + let input = Int32Array::from(vec![Some(-1), Some(-2), None]); + let args = ColumnarValue::Array(Arc::new(input)); + match spark_string_space(&[args]) { + Ok(ColumnarValue::Array(result)) => { + let actual = as_string_array(&result).unwrap(); + let expected = StringArray::from(vec![Some(""), Some(""), None]); + assert_eq!(actual, &expected) + } + _ => unreachable!(), + } + } +} diff --git a/native/spark-expr/src/string_funcs/substring.rs b/native/spark-expr/src/string_funcs/substring.rs index f0ecb4e7e3..5037a6e06f 100644 --- a/native/spark-expr/src/string_funcs/substring.rs +++ b/native/spark-expr/src/string_funcs/substring.rs @@ -61,7 +61,7 @@ impl Display for SubstringExpr { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { write!( f, - "StringSpace [start: {}, len: {}, child: {}]", + "Substring [start: {}, len: {}, child: {}]", self.start, self.len, self.child ) } diff --git a/spark/src/test/scala/org/apache/comet/CometStringExpressionSuite.scala b/spark/src/test/scala/org/apache/comet/CometStringExpressionSuite.scala index f9882780c8..662534b3cd 100644 --- a/spark/src/test/scala/org/apache/comet/CometStringExpressionSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometStringExpressionSuite.scala @@ -376,7 +376,7 @@ class CometStringExpressionSuite extends CometTestBase { } test("string_space") { - withParquetTable((0 until 5).map(i => (i, i + 1)), "tbl") { + withParquetTable((0 until 5).map(i => (-i, i + 1)), "tbl") { checkSparkAnswerAndOperator("SELECT space(_1), space(_2) FROM tbl") } }