Skip to content

Commit c33cb85

Browse files
authored
Do not rename struct fields when coercing types in CASE (#14384)
* Fix field name during struct equality coercion * fix bug * Add more tests * Update tests per #14396
1 parent 9d1bfc1 commit c33cb85

File tree

2 files changed

+125
-7
lines changed

2 files changed

+125
-7
lines changed

datafusion/expr-common/src/type_coercion/binary.rs

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -961,23 +961,31 @@ fn struct_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType>
961961
return None;
962962
}
963963

964-
let types = std::iter::zip(lhs_fields.iter(), rhs_fields.iter())
964+
let coerced_types = std::iter::zip(lhs_fields.iter(), rhs_fields.iter())
965965
.map(|(lhs, rhs)| comparison_coercion(lhs.data_type(), rhs.data_type()))
966966
.collect::<Option<Vec<DataType>>>()?;
967967

968-
let fields = types
968+
// preserve the field name and nullability
969+
let orig_fields = std::iter::zip(lhs_fields.iter(), rhs_fields.iter());
970+
971+
let fields: Vec<FieldRef> = coerced_types
969972
.into_iter()
970-
.enumerate()
971-
.map(|(i, datatype)| {
972-
Arc::new(Field::new(format!("c{i}"), datatype, true))
973-
})
974-
.collect::<Vec<FieldRef>>();
973+
.zip(orig_fields)
974+
.map(|(datatype, (lhs, rhs))| coerce_fields(datatype, lhs, rhs))
975+
.collect();
975976
Some(Struct(fields.into()))
976977
}
977978
_ => None,
978979
}
979980
}
980981

982+
/// returns the result of coercing two fields to a common type
983+
fn coerce_fields(common_type: DataType, lhs: &FieldRef, rhs: &FieldRef) -> FieldRef {
984+
let is_nullable = lhs.is_nullable() || rhs.is_nullable();
985+
let name = lhs.name(); // pick the name from the left field
986+
Arc::new(Field::new(name, common_type, is_nullable))
987+
}
988+
981989
/// Returns the output type of applying mathematics operations such as
982990
/// `+` to arguments of `lhs_type` and `rhs_type`.
983991
fn mathematics_numerical_coercion(

datafusion/sqllogictest/test_files/case.slt

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -308,3 +308,113 @@ NULL NULL false
308308

309309
statement ok
310310
drop table foo
311+
312+
313+
# Test coercion of inner struct field names
314+
# Reproducer for https://github.com/apache/datafusion/issues/14383
315+
statement ok
316+
create table t as values
317+
(
318+
100, -- column1 int (so the case isn't constant folded)
319+
{ 'foo': 'bar' }, -- column2 has List of Struct w/ Utf8
320+
{ 'foo': arrow_cast('baz', 'Utf8View') }, -- column3 has List of Struct w/ Utf8View
321+
{ 'foo': arrow_cast('blarg', 'Utf8View') } -- column4 has List of Struct w/ Utf8View
322+
);
323+
324+
325+
# Note field name is foo
326+
query ???
327+
SELECT column2, column3, column4 FROM t;
328+
----
329+
{foo: bar} {foo: baz} {foo: blarg}
330+
331+
# Coerce fields, expect the field name to be the name of the first arg to case
332+
# the field should not be named 'c0'
333+
query ?
334+
SELECT
335+
case
336+
when column1 > 0 then column2
337+
when column1 < 0 then column3
338+
else column4
339+
end
340+
FROM t;
341+
----
342+
{foo: bar}
343+
344+
query ?
345+
SELECT
346+
case
347+
when column1 > 0 then column3 -- different arg order shouldn't affect name
348+
when column1 < 0 then column4
349+
else column2
350+
end
351+
FROM t;
352+
----
353+
{foo: baz}
354+
355+
query ?
356+
SELECT
357+
case
358+
when column1 > 0 then column4 -- different arg order shouldn't affect name
359+
when column1 < 0 then column2
360+
else column3
361+
end
362+
FROM t;
363+
----
364+
{foo: blarg}
365+
366+
statement ok
367+
drop table t
368+
369+
370+
# Test coercion of inner struct field names with different orders / missing fields
371+
statement ok
372+
create table t as values
373+
(
374+
100, -- column1 int (so the case isn't constant folded)
375+
{ 'foo': 'a', 'xxx': 'b' }, -- column2: Struct with fields foo, xxx
376+
{ 'xxx': 'c', 'foo': 'd' }, -- column3: Struct with fields xxx, foo
377+
{ 'xxx': 'e' } -- column4: Struct with field xxx (no second field)
378+
);
379+
380+
# Note field names are in different orders
381+
query ???
382+
SELECT column2, column3, column4 FROM t;
383+
----
384+
{foo: a, xxx: b} {xxx: c, foo: d} {xxx: e}
385+
386+
# coerce structs with different field orders,
387+
# (note the *value*s are from column2 but the field name is 'xxx', as the coerced
388+
# type takes the field name from the last argument (column3)
389+
query ?
390+
SELECT
391+
case
392+
when column1 > 0 then column2 -- always true
393+
else column3
394+
end
395+
FROM t;
396+
----
397+
{xxx: a, foo: b}
398+
399+
# coerce structs with different field orders
400+
query ?
401+
SELECT
402+
case
403+
when column1 < 0 then column2 -- always false
404+
else column3
405+
end
406+
FROM t;
407+
----
408+
{xxx: c, foo: d}
409+
410+
# coerce structs with subset of fields
411+
query error Failed to coerce then
412+
SELECT
413+
case
414+
when column1 > 0 then column3
415+
else column4
416+
end
417+
FROM t;
418+
419+
statement ok
420+
drop table t

0 commit comments

Comments
 (0)