Skip to content

Commit d2b93f5

Browse files
watford-epalamb
andauthored
fix: SortPreservingMerge sanity check rejects valid ORDER BY with CASE expression (#18342)
## Which issue does this PR close? - Closes #18327 ## Rationale for this change ORDER BY with a CASE statement didn't always work, raising a sanity check error in SortPreservingMergeExec. The plan showed that the partitions all had the same ordering, but for whatever reason they were not detected as being equal. Using a single partition succeeded always. ## What changes are included in this PR? The changes are non-obvious and I spent a lot of time bisecting/debug printing and landed on a failure in bounds checking with boolean interval arithmetic. Returning UNCERTAIN if either leg of the interval is NULL resolves the upstream issue where CASE statements end up being deemed Unordered. My rust-fu is hobbyist at best, so while this appears to resolve my issue I cannot for-certain exclaim that I've solved it all (Claude 4.5 agrees with my fix, but that's not an indication its any good). I'm also reasonably certain my unit tests are more ham fisted than necessary. ## Are these changes tested? 1. Yes, unit tests have been added. ## Are there any user-facing changes? This does not change any behavior beyond resolving a bug with a valid SQL statement. --------- Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
1 parent dcd2f4f commit d2b93f5

File tree

2 files changed

+101
-2
lines changed

2 files changed

+101
-2
lines changed

datafusion/expr-common/src/interval_arithmetic.rs

Lines changed: 64 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -583,7 +583,9 @@ impl Interval {
583583
upper: ScalarValue::Boolean(Some(upper)),
584584
})
585585
}
586-
_ => internal_err!("Incompatible data types for logical conjunction"),
586+
587+
// Return UNCERTAIN when intervals don't have concrete boolean bounds
588+
_ => Ok(Self::UNCERTAIN),
587589
}
588590
}
589591

@@ -606,7 +608,9 @@ impl Interval {
606608
upper: ScalarValue::Boolean(Some(upper)),
607609
})
608610
}
609-
_ => internal_err!("Incompatible data types for logical disjunction"),
611+
612+
// Return UNCERTAIN when intervals don't have concrete boolean bounds
613+
_ => Ok(Self::UNCERTAIN),
610614
}
611615
}
612616

@@ -2517,6 +2521,64 @@ mod tests {
25172521
Ok(())
25182522
}
25192523

2524+
#[test]
2525+
fn test_and_or_with_normalized_boolean_intervals() -> Result<()> {
2526+
// Verify that NULL boolean bounds are normalized and don't cause errors
2527+
let from_nulls =
2528+
Interval::try_new(ScalarValue::Boolean(None), ScalarValue::Boolean(None))?;
2529+
2530+
assert!(from_nulls.or(&Interval::CERTAINLY_TRUE).is_ok());
2531+
assert!(from_nulls.and(&Interval::CERTAINLY_FALSE).is_ok());
2532+
2533+
Ok(())
2534+
}
2535+
2536+
#[test]
2537+
fn test_and_null_boolean_intervals() -> Result<()> {
2538+
let null_interval =
2539+
Interval::try_new(ScalarValue::Boolean(None), ScalarValue::Boolean(None))?;
2540+
2541+
let and_result = null_interval.and(&Interval::CERTAINLY_FALSE)?;
2542+
assert_eq!(and_result, Interval::CERTAINLY_FALSE);
2543+
2544+
let and_result = Interval::CERTAINLY_FALSE.and(&null_interval)?;
2545+
assert_eq!(and_result, Interval::CERTAINLY_FALSE);
2546+
2547+
let and_result = null_interval.and(&Interval::CERTAINLY_TRUE)?;
2548+
assert_eq!(and_result, Interval::UNCERTAIN);
2549+
2550+
let and_result = Interval::CERTAINLY_TRUE.and(&null_interval)?;
2551+
assert_eq!(and_result, Interval::UNCERTAIN);
2552+
2553+
let and_result = null_interval.and(&null_interval)?;
2554+
assert_eq!(and_result, Interval::UNCERTAIN);
2555+
2556+
Ok(())
2557+
}
2558+
2559+
#[test]
2560+
fn test_or_null_boolean_intervals() -> Result<()> {
2561+
let null_interval =
2562+
Interval::try_new(ScalarValue::Boolean(None), ScalarValue::Boolean(None))?;
2563+
2564+
let or_result = null_interval.or(&Interval::CERTAINLY_FALSE)?;
2565+
assert_eq!(or_result, Interval::UNCERTAIN);
2566+
2567+
let or_result = Interval::CERTAINLY_FALSE.or(&null_interval)?;
2568+
assert_eq!(or_result, Interval::UNCERTAIN);
2569+
2570+
let or_result = null_interval.or(&Interval::CERTAINLY_TRUE)?;
2571+
assert_eq!(or_result, Interval::CERTAINLY_TRUE);
2572+
2573+
let or_result = Interval::CERTAINLY_TRUE.or(&null_interval)?;
2574+
assert_eq!(or_result, Interval::CERTAINLY_TRUE);
2575+
2576+
let or_result = null_interval.or(&null_interval)?;
2577+
assert_eq!(or_result, Interval::UNCERTAIN);
2578+
2579+
Ok(())
2580+
}
2581+
25202582
#[test]
25212583
fn intersect_test() -> Result<()> {
25222584
let possible_cases = vec![

datafusion/sqllogictest/test_files/union.slt

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -953,3 +953,40 @@ drop table u1;
953953

954954
statement count 0
955955
drop table u2;
956+
957+
# repro for https://github.com/apache/datafusion/issues/18327
958+
# should not error
959+
query TITT
960+
WITH typ(oid, typnamespace, typname, typtype) AS (
961+
SELECT * FROM (VALUES (1, 10, 't1', 'b'))
962+
UNION ALL SELECT * FROM (VALUES (2, NULL, 't2', 'b'))
963+
UNION ALL SELECT * FROM (VALUES (3, 12, 't3', NULL))
964+
)
965+
, ns(oid, nspname) AS (VALUES (1, 'ns1'), (2, 'ns2'))
966+
SELECT ns.nspname, typ.oid, typ.typname, typ.typtype
967+
FROM typ JOIN ns ON (ns.oid = typ.typnamespace)
968+
WHERE typ.typtype IN ('b','r','m','e','d')
969+
ORDER BY CASE WHEN typ.typtype IN ('b','e','p') THEN 0
970+
WHEN typ.typtype = 'r' THEN 1
971+
END
972+
----
973+
974+
# Add another row with a non-NULL value `m` which is retained by the
975+
# filter but not matching any WHEN branch m?
976+
query TITT
977+
WITH typ(oid, typnamespace, typname, typtype) AS (
978+
SELECT * FROM (VALUES (1, 10, 't1', 'b'))
979+
UNION ALL SELECT * FROM (VALUES (2, NULL, 't2', 'b'))
980+
UNION ALL SELECT * FROM (VALUES (3, 12, 't3', NULL))
981+
UNION ALL SELECT * FROM (VALUES (4, 40, 't3', 'm'))
982+
), ns(oid, nspname) AS (
983+
VALUES (1, 'ns1'), (2, 'ns2'), (40, 'ns3')
984+
)
985+
SELECT ns.nspname, typ.oid, typ.typname, typ.typtype
986+
FROM typ JOIN ns ON (ns.oid = typ.typnamespace)
987+
WHERE typ.typtype IN ('b','r','m','e','d')
988+
ORDER BY CASE WHEN typ.typtype IN ('b','e','p') THEN 0
989+
WHEN typ.typtype = 'r' THEN 1
990+
END
991+
----
992+
ns3 4 t3 m

0 commit comments

Comments
 (0)