From 7932cb9373192ce2754b39c1f82f22c8a56b7266 Mon Sep 17 00:00:00 2001 From: Ruihang Xia Date: Wed, 1 Sep 2021 04:15:25 +0800 Subject: [PATCH] Change compound column field name rules (#952) * change physical name semantic * replace expect output in context.rs * replace expect output in sql & dataframe_impl * add spec entry * replace expect output in lib doc & planner --- datafusion/src/execution/context.rs | 418 +++++++++--------- datafusion/src/execution/dataframe_impl.rs | 18 +- datafusion/src/lib.rs | 20 +- datafusion/src/physical_plan/planner.rs | 75 ++-- datafusion/tests/sql.rs | 252 +++++------ .../output-field-name-semantic.md | 24 +- 6 files changed, 412 insertions(+), 395 deletions(-) diff --git a/datafusion/src/execution/context.rs b/datafusion/src/execution/context.rs index 2e6a7a4f7012..da6de0411ad9 100644 --- a/datafusion/src/execution/context.rs +++ b/datafusion/src/execution/context.rs @@ -1493,15 +1493,15 @@ mod tests { assert_eq!(results.len(), 1); let expected = vec![ - "+----+----+---------+-----------+---------+---------+---------+", - "| c1 | c2 | SUM(c2) | COUNT(c2) | MAX(c2) | MIN(c2) | AVG(c2) |", - "+----+----+---------+-----------+---------+---------+---------+", - "| 0 | 1 | 220 | 40 | 10 | 1 | 5.5 |", - "| 0 | 2 | 220 | 40 | 10 | 1 | 5.5 |", - "| 0 | 3 | 220 | 40 | 10 | 1 | 5.5 |", - "| 0 | 4 | 220 | 40 | 10 | 1 | 5.5 |", - "| 0 | 5 | 220 | 40 | 10 | 1 | 5.5 |", - "+----+----+---------+-----------+---------+---------+---------+", + "+----+----+--------------+----------------+--------------+--------------+--------------+", + "| c1 | c2 | SUM(test.c2) | COUNT(test.c2) | MAX(test.c2) | MIN(test.c2) | AVG(test.c2) |", + "+----+----+--------------+----------------+--------------+--------------+--------------+", + "| 0 | 1 | 220 | 40 | 10 | 1 | 5.5 |", + "| 0 | 2 | 220 | 40 | 10 | 1 | 5.5 |", + "| 0 | 3 | 220 | 40 | 10 | 1 | 5.5 |", + "| 0 | 4 | 220 | 40 | 10 | 1 | 5.5 |", + "| 0 | 5 | 220 | 40 | 10 | 1 | 5.5 |", + "+----+----+--------------+----------------+--------------+--------------+--------------+", ]; // window function shall respect ordering @@ -1536,15 +1536,15 @@ mod tests { assert_eq!(results.len(), 1); let expected = vec![ - "+----+----+--------------+-----------------+----------------+------------------------+---------+-----------+---------+---------+---------+", - "| c1 | c2 | ROW_NUMBER() | FIRST_VALUE(c2) | LAST_VALUE(c2) | NTH_VALUE(c2,Int64(2)) | SUM(c2) | COUNT(c2) | MAX(c2) | MIN(c2) | AVG(c2) |", - "+----+----+--------------+-----------------+----------------+------------------------+---------+-----------+---------+---------+---------+", - "| 0 | 1 | 1 | 1 | 1 | | 1 | 1 | 1 | 1 | 1 |", - "| 0 | 2 | 2 | 1 | 2 | 2 | 3 | 2 | 2 | 1 | 1.5 |", - "| 0 | 3 | 3 | 1 | 3 | 2 | 6 | 3 | 3 | 1 | 2 |", - "| 0 | 4 | 4 | 1 | 4 | 2 | 10 | 4 | 4 | 1 | 2.5 |", - "| 0 | 5 | 5 | 1 | 5 | 2 | 15 | 5 | 5 | 1 | 3 |", - "+----+----+--------------+-----------------+----------------+------------------------+---------+-----------+---------+---------+---------+", + "+----+----+--------------+----------------------+---------------------+-----------------------------+--------------+----------------+--------------+--------------+--------------+", + "| c1 | c2 | ROW_NUMBER() | FIRST_VALUE(test.c2) | LAST_VALUE(test.c2) | NTH_VALUE(test.c2,Int64(2)) | SUM(test.c2) | COUNT(test.c2) | MAX(test.c2) | MIN(test.c2) | AVG(test.c2) |", + "+----+----+--------------+----------------------+---------------------+-----------------------------+--------------+----------------+--------------+--------------+--------------+", + "| 0 | 1 | 1 | 1 | 1 | | 1 | 1 | 1 | 1 | 1 |", + "| 0 | 2 | 2 | 1 | 2 | 2 | 3 | 2 | 2 | 1 | 1.5 |", + "| 0 | 3 | 3 | 1 | 3 | 2 | 6 | 3 | 3 | 1 | 2 |", + "| 0 | 4 | 4 | 1 | 4 | 2 | 10 | 4 | 4 | 1 | 2.5 |", + "| 0 | 5 | 5 | 1 | 5 | 2 | 15 | 5 | 5 | 1 | 3 |", + "+----+----+--------------+----------------------+---------------------+-----------------------------+--------------+----------------+--------------+--------------+--------------+", ]; // window function shall respect ordering @@ -1571,15 +1571,15 @@ mod tests { .await?; let expected = vec![ - "+----+----+---------+-----------+---------+---------+---------+", - "| c1 | c2 | SUM(c2) | COUNT(c2) | MAX(c2) | MIN(c2) | AVG(c2) |", - "+----+----+---------+-----------+---------+---------+---------+", - "| 0 | 1 | 4 | 4 | 1 | 1 | 1 |", - "| 0 | 2 | 8 | 4 | 2 | 2 | 2 |", - "| 0 | 3 | 12 | 4 | 3 | 3 | 3 |", - "| 0 | 4 | 16 | 4 | 4 | 4 | 4 |", - "| 0 | 5 | 20 | 4 | 5 | 5 | 5 |", - "+----+----+---------+-----------+---------+---------+---------+", + "+----+----+--------------+----------------+--------------+--------------+--------------+", + "| c1 | c2 | SUM(test.c2) | COUNT(test.c2) | MAX(test.c2) | MIN(test.c2) | AVG(test.c2) |", + "+----+----+--------------+----------------+--------------+--------------+--------------+", + "| 0 | 1 | 4 | 4 | 1 | 1 | 1 |", + "| 0 | 2 | 8 | 4 | 2 | 2 | 2 |", + "| 0 | 3 | 12 | 4 | 3 | 3 | 3 |", + "| 0 | 4 | 16 | 4 | 4 | 4 | 4 |", + "| 0 | 5 | 20 | 4 | 5 | 5 | 5 |", + "+----+----+--------------+----------------+--------------+--------------+--------------+", ]; // window function shall respect ordering @@ -1610,15 +1610,15 @@ mod tests { .await?; let expected = vec![ - "+----+----+--------------+-------------------------+------------------------+--------------------------------+---------+-----------+---------+---------+---------+", - "| c1 | c2 | ROW_NUMBER() | FIRST_VALUE(c2 Plus c1) | LAST_VALUE(c2 Plus c1) | NTH_VALUE(c2 Plus c1,Int64(1)) | SUM(c2) | COUNT(c2) | MAX(c2) | MIN(c2) | AVG(c2) |", - "+----+----+--------------+-------------------------+------------------------+--------------------------------+---------+-----------+---------+---------+---------+", - "| 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |", - "| 0 | 2 | 1 | 2 | 2 | 2 | 2 | 1 | 2 | 2 | 2 |", - "| 0 | 3 | 1 | 3 | 3 | 3 | 3 | 1 | 3 | 3 | 3 |", - "| 0 | 4 | 1 | 4 | 4 | 4 | 4 | 1 | 4 | 4 | 4 |", - "| 0 | 5 | 1 | 5 | 5 | 5 | 5 | 1 | 5 | 5 | 5 |", - "+----+----+--------------+-------------------------+------------------------+--------------------------------+---------+-----------+---------+---------+---------+", + "+----+----+--------------+-----------------------------------+----------------------------------+------------------------------------------+--------------+----------------+--------------+--------------+--------------+", + "| c1 | c2 | ROW_NUMBER() | FIRST_VALUE(test.c2 Plus test.c1) | LAST_VALUE(test.c2 Plus test.c1) | NTH_VALUE(test.c2 Plus test.c1,Int64(1)) | SUM(test.c2) | COUNT(test.c2) | MAX(test.c2) | MIN(test.c2) | AVG(test.c2) |", + "+----+----+--------------+-----------------------------------+----------------------------------+------------------------------------------+--------------+----------------+--------------+--------------+--------------+", + "| 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |", + "| 0 | 2 | 1 | 2 | 2 | 2 | 2 | 1 | 2 | 2 | 2 |", + "| 0 | 3 | 1 | 3 | 3 | 3 | 3 | 1 | 3 | 3 | 3 |", + "| 0 | 4 | 1 | 4 | 4 | 4 | 4 | 1 | 4 | 4 | 4 |", + "| 0 | 5 | 1 | 5 | 5 | 5 | 5 | 1 | 5 | 5 | 5 |", + "+----+----+--------------+-----------------------------------+----------------------------------+------------------------------------------+--------------+----------------+--------------+--------------+--------------+", ]; // window function shall respect ordering @@ -1632,11 +1632,11 @@ mod tests { assert_eq!(results.len(), 1); let expected = vec![ - "+---------+---------+", - "| SUM(c1) | SUM(c2) |", - "+---------+---------+", - "| 60 | 220 |", - "+---------+---------+", + "+--------------+--------------+", + "| SUM(test.c1) | SUM(test.c2) |", + "+--------------+--------------+", + "| 60 | 220 |", + "+--------------+--------------+", ]; assert_batches_sorted_eq!(expected, &results); @@ -1653,11 +1653,11 @@ mod tests { assert_eq!(results.len(), 1); let expected = vec![ - "+---------+---------+", - "| SUM(c1) | SUM(c2) |", - "+---------+---------+", - "| | |", - "+---------+---------+", + "+--------------+--------------+", + "| SUM(test.c1) | SUM(test.c2) |", + "+--------------+--------------+", + "| | |", + "+--------------+--------------+", ]; assert_batches_sorted_eq!(expected, &results); @@ -1670,11 +1670,11 @@ mod tests { assert_eq!(results.len(), 1); let expected = vec![ - "+---------+---------+", - "| AVG(c1) | AVG(c2) |", - "+---------+---------+", - "| 1.5 | 5.5 |", - "+---------+---------+", + "+--------------+--------------+", + "| AVG(test.c1) | AVG(test.c2) |", + "+--------------+--------------+", + "| 1.5 | 5.5 |", + "+--------------+--------------+", ]; assert_batches_sorted_eq!(expected, &results); @@ -1687,11 +1687,11 @@ mod tests { assert_eq!(results.len(), 1); let expected = vec![ - "+---------+---------+", - "| MAX(c1) | MAX(c2) |", - "+---------+---------+", - "| 3 | 10 |", - "+---------+---------+", + "+--------------+--------------+", + "| MAX(test.c1) | MAX(test.c2) |", + "+--------------+--------------+", + "| 3 | 10 |", + "+--------------+--------------+", ]; assert_batches_sorted_eq!(expected, &results); @@ -1704,11 +1704,11 @@ mod tests { assert_eq!(results.len(), 1); let expected = vec![ - "+---------+---------+", - "| MIN(c1) | MIN(c2) |", - "+---------+---------+", - "| 0 | 1 |", - "+---------+---------+", + "+--------------+--------------+", + "| MIN(test.c1) | MIN(test.c2) |", + "+--------------+--------------+", + "| 0 | 1 |", + "+--------------+--------------+", ]; assert_batches_sorted_eq!(expected, &results); @@ -1720,14 +1720,14 @@ mod tests { let results = execute("SELECT c1, SUM(c2) FROM test GROUP BY c1", 4).await?; let expected = vec![ - "+----+---------+", - "| c1 | SUM(c2) |", - "+----+---------+", - "| 0 | 55 |", - "| 1 | 55 |", - "| 2 | 55 |", - "| 3 | 55 |", - "+----+---------+", + "+----+--------------+", + "| c1 | SUM(test.c2) |", + "+----+--------------+", + "| 0 | 55 |", + "| 1 | 55 |", + "| 2 | 55 |", + "| 3 | 55 |", + "+----+--------------+", ]; assert_batches_sorted_eq!(expected, &results); @@ -1739,14 +1739,14 @@ mod tests { let results = execute("SELECT c1, AVG(c2) FROM test GROUP BY c1", 4).await?; let expected = vec![ - "+----+---------+", - "| c1 | AVG(c2) |", - "+----+---------+", - "| 0 | 5.5 |", - "| 1 | 5.5 |", - "| 2 | 5.5 |", - "| 3 | 5.5 |", - "+----+---------+", + "+----+--------------+", + "| c1 | AVG(test.c2) |", + "+----+--------------+", + "| 0 | 5.5 |", + "| 1 | 5.5 |", + "| 2 | 5.5 |", + "| 3 | 5.5 |", + "+----+--------------+", ]; assert_batches_sorted_eq!(expected, &results); @@ -1790,14 +1790,14 @@ mod tests { let results = execute("SELECT c1, MAX(c2) FROM test GROUP BY c1", 4).await?; let expected = vec![ - "+----+---------+", - "| c1 | MAX(c2) |", - "+----+---------+", - "| 0 | 10 |", - "| 1 | 10 |", - "| 2 | 10 |", - "| 3 | 10 |", - "+----+---------+", + "+----+--------------+", + "| c1 | MAX(test.c2) |", + "+----+--------------+", + "| 0 | 10 |", + "| 1 | 10 |", + "| 2 | 10 |", + "| 3 | 10 |", + "+----+--------------+", ]; assert_batches_sorted_eq!(expected, &results); @@ -1809,14 +1809,14 @@ mod tests { let results = execute("SELECT c1, MIN(c2) FROM test GROUP BY c1", 4).await?; let expected = vec![ - "+----+---------+", - "| c1 | MIN(c2) |", - "+----+---------+", - "| 0 | 1 |", - "| 1 | 1 |", - "| 2 | 1 |", - "| 3 | 1 |", - "+----+---------+", + "+----+--------------+", + "| c1 | MIN(test.c2) |", + "+----+--------------+", + "| 0 | 1 |", + "| 1 | 1 |", + "| 2 | 1 |", + "| 3 | 1 |", + "+----+--------------+", ]; assert_batches_sorted_eq!(expected, &results); @@ -1857,11 +1857,11 @@ mod tests { .unwrap(); let expected = vec![ - "+--------------+---------------+---------------+-------------+", - "| COUNT(nanos) | COUNT(micros) | COUNT(millis) | COUNT(secs) |", - "+--------------+---------------+---------------+-------------+", - "| 3 | 3 | 3 | 3 |", - "+--------------+---------------+---------------+-------------+", + "+----------------+-----------------+-----------------+---------------+", + "| COUNT(t.nanos) | COUNT(t.micros) | COUNT(t.millis) | COUNT(t.secs) |", + "+----------------+-----------------+-----------------+---------------+", + "| 3 | 3 | 3 | 3 |", + "+----------------+-----------------+-----------------+---------------+", ]; assert_batches_sorted_eq!(expected, &results); @@ -1884,7 +1884,7 @@ mod tests { let expected = vec![ "+----------------------------+----------------------------+-------------------------+---------------------+", - "| MIN(nanos) | MIN(micros) | MIN(millis) | MIN(secs) |", + "| MIN(t.nanos) | MIN(t.micros) | MIN(t.millis) | MIN(t.secs) |", "+----------------------------+----------------------------+-------------------------+---------------------+", "| 2011-12-13 11:13:10.123450 | 2011-12-13 11:13:10.123450 | 2011-12-13 11:13:10.123 | 2011-12-13 11:13:10 |", "+----------------------------+----------------------------+-------------------------+---------------------+", @@ -1910,7 +1910,7 @@ mod tests { let expected = vec![ "+-------------------------+-------------------------+-------------------------+---------------------+", - "| MAX(nanos) | MAX(micros) | MAX(millis) | MAX(secs) |", + "| MAX(t.nanos) | MAX(t.micros) | MAX(t.millis) | MAX(t.secs) |", "+-------------------------+-------------------------+-------------------------+---------------------+", "| 2021-01-01 05:11:10.432 | 2021-01-01 05:11:10.432 | 2021-01-01 05:11:10.432 | 2021-01-01 05:11:10 |", "+-------------------------+-------------------------+-------------------------+---------------------+", @@ -1961,11 +1961,11 @@ mod tests { assert_eq!(results.len(), 1); let expected = vec![ - "+-----------+-----------+", - "| COUNT(c1) | COUNT(c2) |", - "+-----------+-----------+", - "| 10 | 10 |", - "+-----------+-----------+", + "+----------------+----------------+", + "| COUNT(test.c1) | COUNT(test.c2) |", + "+----------------+----------------+", + "| 10 | 10 |", + "+----------------+----------------+", ]; assert_batches_sorted_eq!(expected, &results); Ok(()) @@ -1977,11 +1977,11 @@ mod tests { assert_eq!(results.len(), 1); let expected = vec![ - "+-----------+-----------+", - "| COUNT(c1) | COUNT(c2) |", - "+-----------+-----------+", - "| 40 | 40 |", - "+-----------+-----------+", + "+----------------+----------------+", + "| COUNT(test.c1) | COUNT(test.c2) |", + "+----------------+----------------+", + "| 40 | 40 |", + "+----------------+----------------+", ]; assert_batches_sorted_eq!(expected, &results); Ok(()) @@ -1992,14 +1992,14 @@ mod tests { let results = execute("SELECT c1, COUNT(c2) FROM test GROUP BY c1", 4).await?; let expected = vec![ - "+----+-----------+", - "| c1 | COUNT(c2) |", - "+----+-----------+", - "| 0 | 10 |", - "| 1 | 10 |", - "| 2 | 10 |", - "| 3 | 10 |", - "+----+-----------+", + "+----+----------------+", + "| c1 | COUNT(test.c2) |", + "+----+----------------+", + "| 0 | 10 |", + "| 1 | 10 |", + "| 2 | 10 |", + "| 3 | 10 |", + "+----+----------------+", ]; assert_batches_sorted_eq!(expected, &results); Ok(()) @@ -2043,12 +2043,12 @@ mod tests { ).await?; let expected = vec![ - "+---------------------+---------+", - "| week | SUM(c2) |", - "+---------------------+---------+", - "| 2020-12-07 00:00:00 | 24 |", - "| 2020-12-14 00:00:00 | 156 |", - "+---------------------+---------+", + "+---------------------+--------------+", + "| week | SUM(test.c2) |", + "+---------------------+--------------+", + "| 2020-12-07 00:00:00 | 24 |", + "| 2020-12-14 00:00:00 | 156 |", + "+---------------------+--------------+", ]; assert_batches_sorted_eq!(expected, &results); @@ -2094,13 +2094,13 @@ mod tests { .expect("ran plan correctly"); let expected = vec![ - "+-----+------------+", - "| str | COUNT(val) |", - "+-----+------------+", - "| A | 4 |", - "| B | 1 |", - "| C | 1 |", - "+-----+------------+", + "+-----+--------------+", + "| str | COUNT(t.val) |", + "+-----+--------------+", + "| A | 4 |", + "| B | 1 |", + "| C | 1 |", + "+-----+--------------+", ]; assert_batches_sorted_eq!(expected, &results); } @@ -2145,13 +2145,13 @@ mod tests { .expect("ran plan correctly"); let expected = vec![ - "+------+------------+", - "| dict | COUNT(val) |", - "+------+------------+", - "| A | 4 |", - "| B | 1 |", - "| C | 1 |", - "+------+------------+", + "+------+--------------+", + "| dict | COUNT(t.val) |", + "+------+--------------+", + "| A | 4 |", + "| B | 1 |", + "| C | 1 |", + "+------+--------------+", ]; assert_batches_sorted_eq!(expected, &results); @@ -2162,13 +2162,13 @@ mod tests { .expect("ran plan correctly"); let expected = vec![ - "+-----+-------------+", - "| val | COUNT(dict) |", - "+-----+-------------+", - "| 1 | 3 |", - "| 2 | 2 |", - "| 4 | 1 |", - "+-----+-------------+", + "+-----+---------------+", + "| val | COUNT(t.dict) |", + "+-----+---------------+", + "| 1 | 3 |", + "| 2 | 2 |", + "| 4 | 1 |", + "+-----+---------------+", ]; assert_batches_sorted_eq!(expected, &results); @@ -2181,13 +2181,13 @@ mod tests { .expect("ran plan correctly"); let expected = vec![ - "+-----+----------------------+", - "| val | COUNT(DISTINCT dict) |", - "+-----+----------------------+", - "| 1 | 2 |", - "| 2 | 2 |", - "| 4 | 1 |", - "+-----+----------------------+", + "+-----+------------------------+", + "| val | COUNT(DISTINCT t.dict) |", + "+-----+------------------------+", + "| 1 | 2 |", + "| 2 | 2 |", + "| 4 | 1 |", + "+-----+------------------------+", ]; assert_batches_sorted_eq!(expected, &results); } @@ -2286,13 +2286,13 @@ mod tests { let results = run_count_distinct_integers_aggregated_scenario(partitions).await?; let expected = vec![ - "+---------+-----------------+------------------------+-------------------------+-------------------------+-------------------------+-------------------------+--------------------------+--------------------------+--------------------------+", - "| c_group | COUNT(c_uint64) | COUNT(DISTINCT c_int8) | COUNT(DISTINCT c_int16) | COUNT(DISTINCT c_int32) | COUNT(DISTINCT c_int64) | COUNT(DISTINCT c_uint8) | COUNT(DISTINCT c_uint16) | COUNT(DISTINCT c_uint32) | COUNT(DISTINCT c_uint64) |", - "+---------+-----------------+------------------------+-------------------------+-------------------------+-------------------------+-------------------------+--------------------------+--------------------------+--------------------------+", - "| a | 3 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 |", - "| b | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |", - "| c | 3 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 |", - "+---------+-----------------+------------------------+-------------------------+-------------------------+-------------------------+-------------------------+--------------------------+--------------------------+--------------------------+", + "+---------+----------------------+-----------------------------+------------------------------+------------------------------+------------------------------+------------------------------+-------------------------------+-------------------------------+-------------------------------+", + "| c_group | COUNT(test.c_uint64) | COUNT(DISTINCT test.c_int8) | COUNT(DISTINCT test.c_int16) | COUNT(DISTINCT test.c_int32) | COUNT(DISTINCT test.c_int64) | COUNT(DISTINCT test.c_uint8) | COUNT(DISTINCT test.c_uint16) | COUNT(DISTINCT test.c_uint32) | COUNT(DISTINCT test.c_uint64) |", + "+---------+----------------------+-----------------------------+------------------------------+------------------------------+------------------------------+------------------------------+-------------------------------+-------------------------------+-------------------------------+", + "| a | 3 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 |", + "| b | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |", + "| c | 3 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 |", + "+---------+----------------------+-----------------------------+------------------------------+------------------------------+------------------------------+------------------------------+-------------------------------+-------------------------------+-------------------------------+", ]; assert_batches_sorted_eq!(expected, &results); @@ -2312,13 +2312,13 @@ mod tests { let results = run_count_distinct_integers_aggregated_scenario(partitions).await?; let expected = vec![ - "+---------+-----------------+------------------------+-------------------------+-------------------------+-------------------------+-------------------------+--------------------------+--------------------------+--------------------------+", - "| c_group | COUNT(c_uint64) | COUNT(DISTINCT c_int8) | COUNT(DISTINCT c_int16) | COUNT(DISTINCT c_int32) | COUNT(DISTINCT c_int64) | COUNT(DISTINCT c_uint8) | COUNT(DISTINCT c_uint16) | COUNT(DISTINCT c_uint32) | COUNT(DISTINCT c_uint64) |", - "+---------+-----------------+------------------------+-------------------------+-------------------------+-------------------------+-------------------------+--------------------------+--------------------------+--------------------------+", - "| a | 5 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 |", - "| b | 5 | 4 | 4 | 4 | 4 | 4 | 4 | 4 | 4 |", - "| c | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |", - "+---------+-----------------+------------------------+-------------------------+-------------------------+-------------------------+-------------------------+--------------------------+--------------------------+--------------------------+", + "+---------+----------------------+-----------------------------+------------------------------+------------------------------+------------------------------+------------------------------+-------------------------------+-------------------------------+-------------------------------+", + "| c_group | COUNT(test.c_uint64) | COUNT(DISTINCT test.c_int8) | COUNT(DISTINCT test.c_int16) | COUNT(DISTINCT test.c_int32) | COUNT(DISTINCT test.c_int64) | COUNT(DISTINCT test.c_uint8) | COUNT(DISTINCT test.c_uint16) | COUNT(DISTINCT test.c_uint32) | COUNT(DISTINCT test.c_uint64) |", + "+---------+----------------------+-----------------------------+------------------------------+------------------------------+------------------------------+------------------------------+-------------------------------+-------------------------------+-------------------------------+", + "| a | 5 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 |", + "| b | 5 | 4 | 4 | 4 | 4 | 4 | 4 | 4 | 4 |", + "| c | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |", + "+---------+----------------------+-----------------------------+------------------------------+------------------------------+------------------------------+------------------------------+-------------------------------+-------------------------------+-------------------------------+", ]; assert_batches_sorted_eq!(expected, &results); @@ -2433,11 +2433,11 @@ mod tests { .unwrap(); let expected = vec![ - "+---------+", - "| sqrt(i) |", - "+---------+", - "| 1 |", - "+---------+", + "+-----------+", + "| sqrt(t.i) |", + "+-----------+", + "| 1 |", + "+-----------+", ]; let results = plan_and_collect(&mut ctx, "SELECT sqrt(i) FROM t") @@ -2521,11 +2521,11 @@ mod tests { let provider = MemTable::try_new(schema, vec![vec![batch]]).unwrap(); ctx.register_table("t", Arc::new(provider)).unwrap(); let expected = vec![ - "+---------+", - "| sqrt(v) |", - "+---------+", - "| 1 |", - "+---------+", + "+-----------+", + "| sqrt(t.v) |", + "+-----------+", + "| 1 |", + "+-----------+", ]; let results = plan_and_collect(&mut ctx, "SELECT sqrt(v) FROM t") .await @@ -2564,11 +2564,11 @@ mod tests { let result = plan_and_collect(&mut ctx, "SELECT \"MY_FUNC\"(i) FROM t").await?; let expected = vec![ - "+------------+", - "| MY_FUNC(i) |", - "+------------+", - "| 1 |", - "+------------+", + "+--------------+", + "| MY_FUNC(t.i) |", + "+--------------+", + "| 1 |", + "+--------------+", ]; assert_batches_eq!(expected, &result); @@ -2582,11 +2582,11 @@ mod tests { .unwrap(); let expected = vec![ - "+--------+", - "| MAX(i) |", - "+--------+", - "| 1 |", - "+--------+", + "+----------+", + "| MAX(t.i) |", + "+----------+", + "| 1 |", + "+----------+", ]; let results = plan_and_collect(&mut ctx, "SELECT max(i) FROM t") @@ -2645,11 +2645,11 @@ mod tests { let result = plan_and_collect(&mut ctx, "SELECT \"MY_AVG\"(i) FROM t").await?; let expected = vec![ - "+-----------+", - "| MY_AVG(i) |", - "+-----------+", - "| 1 |", - "+-----------+", + "+-------------+", + "| MY_AVG(t.i) |", + "+-------------+", + "| 1 |", + "+-------------+", ]; assert_batches_eq!(expected, &result); @@ -2745,11 +2745,11 @@ mod tests { assert_eq!(results.len(), 1); let expected = vec![ - "+---------+---------+-----------------+", - "| SUM(c1) | SUM(c2) | COUNT(UInt8(1)) |", - "+---------+---------+-----------------+", - "| 10 | 110 | 20 |", - "+---------+---------+-----------------+", + "+--------------+--------------+-----------------+", + "| SUM(test.c1) | SUM(test.c2) | COUNT(UInt8(1)) |", + "+--------------+--------------+-----------------+", + "| 10 | 110 | 20 |", + "+--------------+--------------+-----------------+", ]; assert_batches_eq!(expected, &results); @@ -2864,14 +2864,14 @@ mod tests { let result = collect(plan).await?; let expected = vec![ - "+-----+-----+-------------+", - "| a | b | my_add(a,b) |", - "+-----+-----+-------------+", - "| 1 | 2 | 3 |", - "| 10 | 12 | 22 |", - "| 10 | 12 | 22 |", - "| 100 | 120 | 220 |", - "+-----+-----+-------------+", + "+-----+-----+-----------------+", + "| a | b | my_add(t.a,t.b) |", + "+-----+-----+-----------------+", + "| 1 | 2 | 3 |", + "| 10 | 12 | 22 |", + "| 10 | 12 | 22 |", + "| 100 | 120 | 220 |", + "+-----+-----+-----------------+", ]; assert_batches_eq!(expected, &result); @@ -2974,11 +2974,11 @@ mod tests { let result = plan_and_collect(&mut ctx, "SELECT MY_AVG(a) FROM t").await?; let expected = vec![ - "+-----------+", - "| my_avg(a) |", - "+-----------+", - "| 3 |", - "+-----------+", + "+-------------+", + "| my_avg(t.a) |", + "+-------------+", + "| 3 |", + "+-------------+", ]; assert_batches_eq!(expected, &result); diff --git a/datafusion/src/execution/dataframe_impl.rs b/datafusion/src/execution/dataframe_impl.rs index 5e1a4f457b73..724a3f8493c5 100644 --- a/datafusion/src/execution/dataframe_impl.rs +++ b/datafusion/src/execution/dataframe_impl.rs @@ -277,15 +277,15 @@ mod tests { assert_batches_sorted_eq!( vec![ - "+----+----------------------+--------------------+---------------------+--------------------+------------+---------------------+", - "| c1 | MIN(c12) | MAX(c12) | AVG(c12) | SUM(c12) | COUNT(c12) | COUNT(DISTINCT c12) |", - "+----+----------------------+--------------------+---------------------+--------------------+------------+---------------------+", - "| a | 0.02182578039211991 | 0.9800193410444061 | 0.48754517466109415 | 10.238448667882977 | 21 | 21 |", - "| b | 0.04893135681998029 | 0.9185813970744787 | 0.41040709263815384 | 7.797734760124923 | 19 | 19 |", - "| c | 0.0494924465469434 | 0.991517828651004 | 0.6600456536439784 | 13.860958726523545 | 21 | 21 |", - "| d | 0.061029375346466685 | 0.9748360509016578 | 0.48855379387549824 | 8.793968289758968 | 18 | 18 |", - "| e | 0.01479305307777301 | 0.9965400387585364 | 0.48600669271341534 | 10.206140546981722 | 21 | 21 |", - "+----+----------------------+--------------------+---------------------+--------------------+------------+---------------------+", + "+----+-----------------------------+-----------------------------+-----------------------------+-----------------------------+-------------------------------+----------------------------------------+", + "| c1 | MIN(aggregate_test_100.c12) | MAX(aggregate_test_100.c12) | AVG(aggregate_test_100.c12) | SUM(aggregate_test_100.c12) | COUNT(aggregate_test_100.c12) | COUNT(DISTINCT aggregate_test_100.c12) |", + "+----+-----------------------------+-----------------------------+-----------------------------+-----------------------------+-------------------------------+----------------------------------------+", + "| a | 0.02182578039211991 | 0.9800193410444061 | 0.48754517466109415 | 10.238448667882977 | 21 | 21 |", + "| b | 0.04893135681998029 | 0.9185813970744787 | 0.41040709263815384 | 7.797734760124923 | 19 | 19 |", + "| c | 0.0494924465469434 | 0.991517828651004 | 0.6600456536439784 | 13.860958726523545 | 21 | 21 |", + "| d | 0.061029375346466685 | 0.9748360509016578 | 0.48855379387549824 | 8.793968289758968 | 18 | 18 |", + "| e | 0.01479305307777301 | 0.9965400387585364 | 0.48600669271341534 | 10.206140546981722 | 21 | 21 |", + "+----+-----------------------------+-----------------------------+-----------------------------+-----------------------------+-------------------------------+----------------------------------------+", ], &df ); diff --git a/datafusion/src/lib.rs b/datafusion/src/lib.rs index 93ff7e7c683b..eac9b5f5a78a 100644 --- a/datafusion/src/lib.rs +++ b/datafusion/src/lib.rs @@ -60,11 +60,11 @@ //! let pretty_results = arrow::util::pretty::pretty_format_batches(&results)?; //! //! let expected = vec![ -//! "+---+--------+", -//! "| a | MIN(b) |", -//! "+---+--------+", -//! "| 1 | 2 |", -//! "+---+--------+" +//! "+---+--------------------------+", +//! "| a | MIN(tests/example.csv.b) |", +//! "+---+--------------------------+", +//! "| 1 | 2 |", +//! "+---+--------------------------+" //! ]; //! //! assert_eq!(pretty_results.trim().lines().collect::>(), expected); @@ -95,11 +95,11 @@ //! let pretty_results = arrow::util::pretty::pretty_format_batches(&results)?; //! //! let expected = vec![ -//! "+---+--------+", -//! "| a | MIN(b) |", -//! "+---+--------+", -//! "| 1 | 2 |", -//! "+---+--------+" +//! "+---+----------------+", +//! "| a | MIN(example.b) |", +//! "+---+----------------+", +//! "| 1 | 2 |", +//! "+---+----------------+" //! ]; //! //! assert_eq!(pretty_results.trim().lines().collect::>(), expected); diff --git a/datafusion/src/physical_plan/planner.rs b/datafusion/src/physical_plan/planner.rs index 1cc3625d6b4f..d4991746f9a0 100644 --- a/datafusion/src/physical_plan/planner.rs +++ b/datafusion/src/physical_plan/planner.rs @@ -62,11 +62,10 @@ fn create_function_physical_name( fun: &str, distinct: bool, args: &[Expr], - input_schema: &DFSchema, ) -> Result { let names: Vec = args .iter() - .map(|e| physical_name(e, input_schema)) + .map(|e| create_physical_name(e, false)) .collect::>()?; let distinct_str = match distinct { @@ -76,15 +75,25 @@ fn create_function_physical_name( Ok(format!("{}({}{})", fun, distinct_str, names.join(","))) } -fn physical_name(e: &Expr, input_schema: &DFSchema) -> Result { +fn physical_name(e: &Expr) -> Result { + create_physical_name(e, true) +} + +fn create_physical_name(e: &Expr, is_first_expr: bool) -> Result { match e { - Expr::Column(c) => Ok(c.name.clone()), + Expr::Column(c) => { + if is_first_expr { + Ok(c.name.clone()) + } else { + Ok(c.flat_name()) + } + } Expr::Alias(_, name) => Ok(name.clone()), Expr::ScalarVariable(variable_names) => Ok(variable_names.join(".")), Expr::Literal(value) => Ok(format!("{:?}", value)), Expr::BinaryExpr { left, op, right } => { - let left = physical_name(left, input_schema)?; - let right = physical_name(right, input_schema)?; + let left = create_physical_name(left, false)?; + let right = create_physical_name(right, false)?; Ok(format!("{} {:?} {}", left, op, right)) } Expr::Case { @@ -106,50 +115,48 @@ fn physical_name(e: &Expr, input_schema: &DFSchema) -> Result { Ok(name) } Expr::Cast { expr, data_type } => { - let expr = physical_name(expr, input_schema)?; + let expr = create_physical_name(expr, false)?; Ok(format!("CAST({} AS {:?})", expr, data_type)) } Expr::TryCast { expr, data_type } => { - let expr = physical_name(expr, input_schema)?; + let expr = create_physical_name(expr, false)?; Ok(format!("TRY_CAST({} AS {:?})", expr, data_type)) } Expr::Not(expr) => { - let expr = physical_name(expr, input_schema)?; + let expr = create_physical_name(expr, false)?; Ok(format!("NOT {}", expr)) } Expr::Negative(expr) => { - let expr = physical_name(expr, input_schema)?; + let expr = create_physical_name(expr, false)?; Ok(format!("(- {})", expr)) } Expr::IsNull(expr) => { - let expr = physical_name(expr, input_schema)?; + let expr = create_physical_name(expr, false)?; Ok(format!("{} IS NULL", expr)) } Expr::IsNotNull(expr) => { - let expr = physical_name(expr, input_schema)?; + let expr = create_physical_name(expr, false)?; Ok(format!("{} IS NOT NULL", expr)) } Expr::ScalarFunction { fun, args, .. } => { - create_function_physical_name(&fun.to_string(), false, args, input_schema) + create_function_physical_name(&fun.to_string(), false, args) } Expr::ScalarUDF { fun, args, .. } => { - create_function_physical_name(&fun.name, false, args, input_schema) + create_function_physical_name(&fun.name, false, args) } Expr::WindowFunction { fun, args, .. } => { - create_function_physical_name(&fun.to_string(), false, args, input_schema) + create_function_physical_name(&fun.to_string(), false, args) } Expr::AggregateFunction { fun, distinct, args, .. - } => { - create_function_physical_name(&fun.to_string(), *distinct, args, input_schema) - } + } => create_function_physical_name(&fun.to_string(), *distinct, args), Expr::AggregateUDF { fun, args } => { let mut names = Vec::with_capacity(args.len()); for e in args { - names.push(physical_name(e, input_schema)?); + names.push(create_physical_name(e, false)?); } Ok(format!("{}({})", fun.name, names.join(","))) } @@ -158,8 +165,8 @@ fn physical_name(e: &Expr, input_schema: &DFSchema) -> Result { list, negated, } => { - let expr = physical_name(expr, input_schema)?; - let list = list.iter().map(|expr| physical_name(expr, input_schema)); + let expr = create_physical_name(expr, false)?; + let list = list.iter().map(|expr| create_physical_name(expr, false)); if *negated { Ok(format!("{} NOT IN ({:?})", expr, list)) } else { @@ -444,7 +451,7 @@ impl DefaultPhysicalPlanner { &physical_input_schema, ctx_state, ), - physical_name(e, logical_input_schema), + physical_name(e), )) }) .collect::>>()?; @@ -545,10 +552,10 @@ impl DefaultPhysicalPlanner { } // logical column is not a derived column, safe to pass along to // physical_name - Err(_) => physical_name(e, input_schema), + Err(_) => physical_name(e), } } else { - physical_name(e, input_schema) + physical_name(e) }; tuple_err(( @@ -1192,7 +1199,7 @@ impl DefaultPhysicalPlanner { // unpack aliased logical expressions, e.g. "sum(col) over () as total" let (name, e) = match e { Expr::Alias(sub_expr, alias) => (alias.clone(), sub_expr.as_ref()), - _ => (physical_name(e, logical_input_schema)?, e), + _ => (physical_name(e)?, e), }; self.create_window_expr_with_name( e, @@ -1271,7 +1278,7 @@ impl DefaultPhysicalPlanner { // unpack aliased logical expressions, e.g. "sum(col) as total" let (name, e) = match e { Expr::Alias(sub_expr, alias) => (alias.clone(), sub_expr.as_ref()), - _ => (physical_name(e, logical_input_schema)?, e), + _ => (physical_name(e)?, e), }; self.create_aggregate_expr_with_name( @@ -1629,16 +1636,24 @@ mod tests { let path = format!("{}/csv/aggregate_test_100.csv", testdata); let options = CsvReadOptions::new().schema_infer_max_records(100); - let logical_plan = LogicalPlanBuilder::scan_csv(path, options, None)? - .aggregate(vec![col("c1")], vec![sum(col("c2"))])? - .build()?; + let logical_plan = LogicalPlanBuilder::scan_csv_with_name( + path, + options, + None, + "aggregate_test_100", + )? + .aggregate(vec![col("c1")], vec![sum(col("c2"))])? + .build()?; let execution_plan = plan(&logical_plan)?; let final_hash_agg = execution_plan .as_any() .downcast_ref::() .expect("hash aggregate"); - assert_eq!("SUM(c2)", final_hash_agg.schema().field(1).name()); + assert_eq!( + "SUM(aggregate_test_100.c2)", + final_hash_agg.schema().field(1).name() + ); // we need access to the input to the partial aggregate so that other projects can // implement serde assert_eq!("c2", final_hash_agg.input_schema().field(1).name()); diff --git a/datafusion/tests/sql.rs b/datafusion/tests/sql.rs index 03c5cbb83d6d..40cd38f7a644 100644 --- a/datafusion/tests/sql.rs +++ b/datafusion/tests/sql.rs @@ -166,18 +166,18 @@ async fn parquet_query() { let sql = "SELECT id, CAST(string_col AS varchar) FROM alltypes_plain"; let actual = execute_to_batches(&mut ctx, sql).await; let expected = vec![ - "+----+--------------------------+", - "| id | CAST(string_col AS Utf8) |", - "+----+--------------------------+", - "| 4 | 0 |", - "| 5 | 1 |", - "| 6 | 0 |", - "| 7 | 1 |", - "| 2 | 0 |", - "| 3 | 1 |", - "| 0 | 0 |", - "| 1 | 1 |", - "+----+--------------------------+", + "+----+-----------------------------------------+", + "| id | CAST(alltypes_plain.string_col AS Utf8) |", + "+----+-----------------------------------------+", + "| 4 | 0 |", + "| 5 | 1 |", + "| 6 | 0 |", + "| 7 | 1 |", + "| 2 | 0 |", + "| 3 | 1 |", + "| 0 | 0 |", + "| 1 | 1 |", + "+----+-----------------------------------------+", ]; assert_batches_eq!(expected, &actual); @@ -338,11 +338,11 @@ async fn csv_count_star() -> Result<()> { let sql = "SELECT COUNT(*), COUNT(1) AS c, COUNT(c1) FROM aggregate_test_100"; let actual = execute_to_batches(&mut ctx, sql).await; let expected = vec![ - "+-----------------+-----+-----------+", - "| COUNT(UInt8(1)) | c | COUNT(c1) |", - "+-----------------+-----+-----------+", - "| 100 | 100 | 100 |", - "+-----------------+-----+-----------+", + "+-----------------+-----+------------------------------+", + "| COUNT(UInt8(1)) | c | COUNT(aggregate_test_100.c1) |", + "+-----------------+-----+------------------------------+", + "| 100 | 100 | 100 |", + "+-----------------+-----+------------------------------+", ]; assert_batches_eq!(expected, &actual); Ok(()) @@ -442,15 +442,15 @@ async fn csv_query_group_by_int_min_max() -> Result<()> { let sql = "SELECT c2, MIN(c12), MAX(c12) FROM aggregate_test_100 GROUP BY c2"; let actual = execute_to_batches(&mut ctx, sql).await; let expected = vec![ - "+----+----------------------+--------------------+", - "| c2 | MIN(c12) | MAX(c12) |", - "+----+----------------------+--------------------+", - "| 1 | 0.05636955101974106 | 0.9965400387585364 |", - "| 2 | 0.16301110515739792 | 0.991517828651004 |", - "| 3 | 0.047343434291126085 | 0.9293883502480845 |", - "| 4 | 0.02182578039211991 | 0.9237877978193884 |", - "| 5 | 0.01479305307777301 | 0.9723580396501548 |", - "+----+----------------------+--------------------+", + "+----+-----------------------------+-----------------------------+", + "| c2 | MIN(aggregate_test_100.c12) | MAX(aggregate_test_100.c12) |", + "+----+-----------------------------+-----------------------------+", + "| 1 | 0.05636955101974106 | 0.9965400387585364 |", + "| 2 | 0.16301110515739792 | 0.991517828651004 |", + "| 3 | 0.047343434291126085 | 0.9293883502480845 |", + "| 4 | 0.02182578039211991 | 0.9237877978193884 |", + "| 5 | 0.01479305307777301 | 0.9723580396501548 |", + "+----+-----------------------------+-----------------------------+", ]; assert_batches_sorted_eq!(expected, &actual); Ok(()) @@ -666,35 +666,35 @@ async fn csv_query_group_by_two_columns() -> Result<()> { let sql = "SELECT c1, c2, MIN(c3) FROM aggregate_test_100 GROUP BY c1, c2"; let actual = execute_to_batches(&mut ctx, sql).await; let expected = vec![ - "+----+----+---------+", - "| c1 | c2 | MIN(c3) |", - "+----+----+---------+", - "| a | 1 | -85 |", - "| a | 2 | -48 |", - "| a | 3 | -72 |", - "| a | 4 | -101 |", - "| a | 5 | -101 |", - "| b | 1 | 12 |", - "| b | 2 | -60 |", - "| b | 3 | -101 |", - "| b | 4 | -117 |", - "| b | 5 | -82 |", - "| c | 1 | -24 |", - "| c | 2 | -117 |", - "| c | 3 | -2 |", - "| c | 4 | -90 |", - "| c | 5 | -94 |", - "| d | 1 | -99 |", - "| d | 2 | 93 |", - "| d | 3 | -76 |", - "| d | 4 | 5 |", - "| d | 5 | -59 |", - "| e | 1 | 36 |", - "| e | 2 | -61 |", - "| e | 3 | -95 |", - "| e | 4 | -56 |", - "| e | 5 | -86 |", - "+----+----+---------+", + "+----+----+----------------------------+", + "| c1 | c2 | MIN(aggregate_test_100.c3) |", + "+----+----+----------------------------+", + "| a | 1 | -85 |", + "| a | 2 | -48 |", + "| a | 3 | -72 |", + "| a | 4 | -101 |", + "| a | 5 | -101 |", + "| b | 1 | 12 |", + "| b | 2 | -60 |", + "| b | 3 | -101 |", + "| b | 4 | -117 |", + "| b | 5 | -82 |", + "| c | 1 | -24 |", + "| c | 2 | -117 |", + "| c | 3 | -2 |", + "| c | 4 | -90 |", + "| c | 5 | -94 |", + "| d | 1 | -99 |", + "| d | 2 | 93 |", + "| d | 3 | -76 |", + "| d | 4 | 5 |", + "| d | 5 | -59 |", + "| e | 1 | 36 |", + "| e | 2 | -61 |", + "| e | 3 | -95 |", + "| e | 4 | -56 |", + "| e | 5 | -86 |", + "+----+----+----------------------------+", ]; assert_batches_sorted_eq!(expected, &actual); Ok(()) @@ -890,15 +890,15 @@ async fn csv_query_group_by_avg() -> Result<()> { let sql = "SELECT c1, avg(c12) FROM aggregate_test_100 GROUP BY c1"; let actual = execute_to_batches(&mut ctx, sql).await; let expected = vec![ - "+----+---------------------+", - "| c1 | AVG(c12) |", - "+----+---------------------+", - "| a | 0.48754517466109415 |", - "| b | 0.41040709263815384 |", - "| c | 0.6600456536439784 |", - "| d | 0.48855379387549824 |", - "| e | 0.48600669271341534 |", - "+----+---------------------+", + "+----+-----------------------------+", + "| c1 | AVG(aggregate_test_100.c12) |", + "+----+-----------------------------+", + "| a | 0.48754517466109415 |", + "| b | 0.41040709263815384 |", + "| c | 0.6600456536439784 |", + "| d | 0.48855379387549824 |", + "| e | 0.48600669271341534 |", + "+----+-----------------------------+", ]; assert_batches_sorted_eq!(expected, &actual); Ok(()) @@ -911,15 +911,15 @@ async fn csv_query_group_by_avg_with_projection() -> Result<()> { let sql = "SELECT avg(c12), c1 FROM aggregate_test_100 GROUP BY c1"; let actual = execute_to_batches(&mut ctx, sql).await; let expected = vec![ - "+---------------------+----+", - "| AVG(c12) | c1 |", - "+---------------------+----+", - "| 0.41040709263815384 | b |", - "| 0.48600669271341534 | e |", - "| 0.48754517466109415 | a |", - "| 0.48855379387549824 | d |", - "| 0.6600456536439784 | c |", - "+---------------------+----+", + "+-----------------------------+----+", + "| AVG(aggregate_test_100.c12) | c1 |", + "+-----------------------------+----+", + "| 0.41040709263815384 | b |", + "| 0.48600669271341534 | e |", + "| 0.48754517466109415 | a |", + "| 0.48855379387549824 | d |", + "| 0.6600456536439784 | c |", + "+-----------------------------+----+", ]; assert_batches_sorted_eq!(expected, &actual); Ok(()) @@ -975,11 +975,11 @@ async fn csv_query_count() -> Result<()> { let sql = "SELECT count(c12) FROM aggregate_test_100"; let actual = execute_to_batches(&mut ctx, sql).await; let expected = vec![ - "+------------+", - "| COUNT(c12) |", - "+------------+", - "| 100 |", - "+------------+", + "+-------------------------------+", + "| COUNT(aggregate_test_100.c12) |", + "+-------------------------------+", + "| 100 |", + "+-------------------------------+", ]; assert_batches_eq!(expected, &actual); Ok(()) @@ -1002,15 +1002,15 @@ async fn csv_query_window_with_empty_over() -> Result<()> { limit 5"; let actual = execute_to_batches(&mut ctx, sql).await; let expected = vec![ - "+-----------+-----------+------------+-------------+-----------------+----------------+------------------------+", - "| c9 | COUNT(c5) | MAX(c5) | MIN(c5) | FIRST_VALUE(c5) | LAST_VALUE(c5) | NTH_VALUE(c5,Int64(2)) |", - "+-----------+-----------+------------+-------------+-----------------+----------------+------------------------+", - "| 28774375 | 100 | 2143473091 | -2141999138 | 2033001162 | 61035129 | 706441268 |", - "| 63044568 | 100 | 2143473091 | -2141999138 | 2033001162 | 61035129 | 706441268 |", - "| 141047417 | 100 | 2143473091 | -2141999138 | 2033001162 | 61035129 | 706441268 |", - "| 141680161 | 100 | 2143473091 | -2141999138 | 2033001162 | 61035129 | 706441268 |", - "| 145294611 | 100 | 2143473091 | -2141999138 | 2033001162 | 61035129 | 706441268 |", - "+-----------+-----------+------------+-------------+-----------------+----------------+------------------------+", + "+-----------+------------------------------+----------------------------+----------------------------+------------------------------------+-----------------------------------+-------------------------------------------+", + "| c9 | COUNT(aggregate_test_100.c5) | MAX(aggregate_test_100.c5) | MIN(aggregate_test_100.c5) | FIRST_VALUE(aggregate_test_100.c5) | LAST_VALUE(aggregate_test_100.c5) | NTH_VALUE(aggregate_test_100.c5,Int64(2)) |", + "+-----------+------------------------------+----------------------------+----------------------------+------------------------------------+-----------------------------------+-------------------------------------------+", + "| 28774375 | 100 | 2143473091 | -2141999138 | 2033001162 | 61035129 | 706441268 |", + "| 63044568 | 100 | 2143473091 | -2141999138 | 2033001162 | 61035129 | 706441268 |", + "| 141047417 | 100 | 2143473091 | -2141999138 | 2033001162 | 61035129 | 706441268 |", + "| 141680161 | 100 | 2143473091 | -2141999138 | 2033001162 | 61035129 | 706441268 |", + "| 145294611 | 100 | 2143473091 | -2141999138 | 2033001162 | 61035129 | 706441268 |", + "+-----------+------------------------------+----------------------------+----------------------------+------------------------------------+-----------------------------------+-------------------------------------------+", ]; assert_batches_eq!(expected, &actual); Ok(()) @@ -1035,15 +1035,15 @@ async fn csv_query_window_with_partition_by() -> Result<()> { limit 5"; let actual = execute_to_batches(&mut ctx, sql).await; let expected = vec![ - "+-----------+------------------------+------------------------+--------------------------+------------------------+------------------------+--------------------------------+-------------------------------+---------------------------------------+", - "| c9 | SUM(CAST(c4 AS Int32)) | AVG(CAST(c4 AS Int32)) | COUNT(CAST(c4 AS Int32)) | MAX(CAST(c4 AS Int32)) | MIN(CAST(c4 AS Int32)) | FIRST_VALUE(CAST(c4 AS Int32)) | LAST_VALUE(CAST(c4 AS Int32)) | NTH_VALUE(CAST(c4 AS Int32),Int64(2)) |", - "+-----------+------------------------+------------------------+--------------------------+------------------------+------------------------+--------------------------------+-------------------------------+---------------------------------------+", - "| 28774375 | -16110 | -16110 | 1 | -16110 | -16110 | -16110 | -16110 | |", - "| 63044568 | 3917 | 3917 | 1 | 3917 | 3917 | 3917 | 3917 | |", - "| 141047417 | -38455 | -19227.5 | 2 | -16974 | -21481 | -16974 | -21481 | |", - "| 141680161 | -1114 | -1114 | 1 | -1114 | -1114 | -1114 | -1114 | |", - "| 145294611 | 15673 | 15673 | 1 | 15673 | 15673 | 15673 | 15673 | |", - "+-----------+------------------------+------------------------+--------------------------+------------------------+------------------------+--------------------------------+-------------------------------+---------------------------------------+", + "+-----------+-------------------------------------------+-------------------------------------------+---------------------------------------------+-------------------------------------------+-------------------------------------------+---------------------------------------------------+--------------------------------------------------+----------------------------------------------------------+", + "| c9 | SUM(CAST(aggregate_test_100.c4 AS Int32)) | AVG(CAST(aggregate_test_100.c4 AS Int32)) | COUNT(CAST(aggregate_test_100.c4 AS Int32)) | MAX(CAST(aggregate_test_100.c4 AS Int32)) | MIN(CAST(aggregate_test_100.c4 AS Int32)) | FIRST_VALUE(CAST(aggregate_test_100.c4 AS Int32)) | LAST_VALUE(CAST(aggregate_test_100.c4 AS Int32)) | NTH_VALUE(CAST(aggregate_test_100.c4 AS Int32),Int64(2)) |", + "+-----------+-------------------------------------------+-------------------------------------------+---------------------------------------------+-------------------------------------------+-------------------------------------------+---------------------------------------------------+--------------------------------------------------+----------------------------------------------------------+", + "| 28774375 | -16110 | -16110 | 1 | -16110 | -16110 | -16110 | -16110 | |", + "| 63044568 | 3917 | 3917 | 1 | 3917 | 3917 | 3917 | 3917 | |", + "| 141047417 | -38455 | -19227.5 | 2 | -16974 | -21481 | -16974 | -21481 | |", + "| 141680161 | -1114 | -1114 | 1 | -1114 | -1114 | -1114 | -1114 | |", + "| 145294611 | 15673 | 15673 | 1 | 15673 | 15673 | 15673 | 15673 | |", + "+-----------+-------------------------------------------+-------------------------------------------+---------------------------------------------+-------------------------------------------+-------------------------------------------+---------------------------------------------------+--------------------------------------------------+----------------------------------------------------------+", ]; assert_batches_eq!(expected, &actual); Ok(()) @@ -1068,15 +1068,15 @@ async fn csv_query_window_with_order_by() -> Result<()> { limit 5"; let actual = execute_to_batches(&mut ctx, sql).await; let expected = vec![ - "+-----------+-------------+--------------------+-----------+-----------+-------------+-----------------+----------------+------------------------+", - "| c9 | SUM(c5) | AVG(c5) | COUNT(c5) | MAX(c5) | MIN(c5) | FIRST_VALUE(c5) | LAST_VALUE(c5) | NTH_VALUE(c5,Int64(2)) |", - "+-----------+-------------+--------------------+-----------+-----------+-------------+-----------------+----------------+------------------------+", - "| 28774375 | 61035129 | 61035129 | 1 | 61035129 | 61035129 | 61035129 | 61035129 | |", - "| 63044568 | -47938237 | -23969118.5 | 2 | 61035129 | -108973366 | 61035129 | -108973366 | -108973366 |", - "| 141047417 | 575165281 | 191721760.33333334 | 3 | 623103518 | -108973366 | 61035129 | 623103518 | -108973366 |", - "| 141680161 | -1352462829 | -338115707.25 | 4 | 623103518 | -1927628110 | 61035129 | -1927628110 | -108973366 |", - "| 145294611 | -3251637940 | -650327588 | 5 | 623103518 | -1927628110 | 61035129 | -1899175111 | -108973366 |", - "+-----------+-------------+--------------------+-----------+-----------+-------------+-----------------+----------------+------------------------+", + "+-----------+----------------------------+----------------------------+------------------------------+----------------------------+----------------------------+------------------------------------+-----------------------------------+-------------------------------------------+", + "| c9 | SUM(aggregate_test_100.c5) | AVG(aggregate_test_100.c5) | COUNT(aggregate_test_100.c5) | MAX(aggregate_test_100.c5) | MIN(aggregate_test_100.c5) | FIRST_VALUE(aggregate_test_100.c5) | LAST_VALUE(aggregate_test_100.c5) | NTH_VALUE(aggregate_test_100.c5,Int64(2)) |", + "+-----------+----------------------------+----------------------------+------------------------------+----------------------------+----------------------------+------------------------------------+-----------------------------------+-------------------------------------------+", + "| 28774375 | 61035129 | 61035129 | 1 | 61035129 | 61035129 | 61035129 | 61035129 | |", + "| 63044568 | -47938237 | -23969118.5 | 2 | 61035129 | -108973366 | 61035129 | -108973366 | -108973366 |", + "| 141047417 | 575165281 | 191721760.33333334 | 3 | 623103518 | -108973366 | 61035129 | 623103518 | -108973366 |", + "| 141680161 | -1352462829 | -338115707.25 | 4 | 623103518 | -1927628110 | 61035129 | -1927628110 | -108973366 |", + "| 145294611 | -3251637940 | -650327588 | 5 | 623103518 | -1927628110 | 61035129 | -1899175111 | -108973366 |", + "+-----------+----------------------------+----------------------------+------------------------------+----------------------------+----------------------------+------------------------------------+-----------------------------------+-------------------------------------------+", ]; assert_batches_eq!(expected, &actual); Ok(()) @@ -1089,15 +1089,15 @@ async fn csv_query_group_by_int_count() -> Result<()> { let sql = "SELECT c1, count(c12) FROM aggregate_test_100 GROUP BY c1"; let actual = execute_to_batches(&mut ctx, sql).await; let expected = vec![ - "+----+------------+", - "| c1 | COUNT(c12) |", - "+----+------------+", - "| a | 21 |", - "| b | 19 |", - "| c | 21 |", - "| d | 18 |", - "| e | 21 |", - "+----+------------+", + "+----+-------------------------------+", + "| c1 | COUNT(aggregate_test_100.c12) |", + "+----+-------------------------------+", + "| a | 21 |", + "| b | 19 |", + "| c | 21 |", + "| d | 18 |", + "| e | 21 |", + "+----+-------------------------------+", ]; assert_batches_sorted_eq!(expected, &actual); Ok(()) @@ -1131,15 +1131,15 @@ async fn csv_query_group_by_string_min_max() -> Result<()> { let sql = "SELECT c1, MIN(c12), MAX(c12) FROM aggregate_test_100 GROUP BY c1"; let actual = execute_to_batches(&mut ctx, sql).await; let expected = vec![ - "+----+----------------------+--------------------+", - "| c1 | MIN(c12) | MAX(c12) |", - "+----+----------------------+--------------------+", - "| a | 0.02182578039211991 | 0.9800193410444061 |", - "| b | 0.04893135681998029 | 0.9185813970744787 |", - "| c | 0.0494924465469434 | 0.991517828651004 |", - "| d | 0.061029375346466685 | 0.9748360509016578 |", - "| e | 0.01479305307777301 | 0.9965400387585364 |", - "+----+----------------------+--------------------+", + "+----+-----------------------------+-----------------------------+", + "| c1 | MIN(aggregate_test_100.c12) | MAX(aggregate_test_100.c12) |", + "+----+-----------------------------+-----------------------------+", + "| a | 0.02182578039211991 | 0.9800193410444061 |", + "| b | 0.04893135681998029 | 0.9185813970744787 |", + "| c | 0.0494924465469434 | 0.991517828651004 |", + "| d | 0.061029375346466685 | 0.9748360509016578 |", + "| e | 0.01479305307777301 | 0.9965400387585364 |", + "+----+-----------------------------+-----------------------------+", ]; assert_batches_sorted_eq!(expected, &actual); Ok(()) @@ -4304,11 +4304,11 @@ async fn test_physical_plan_display_indent() { "GlobalLimitExec: limit=10", " SortExec: [the_min@2 DESC]", " CoalescePartitionsExec", - " ProjectionExec: expr=[c1@0 as c1, MAX(aggregate_test_100.c12)@1 as MAX(c12), MIN(aggregate_test_100.c12)@2 as the_min]", - " HashAggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1], aggr=[MAX(c12), MIN(c12)]", + " ProjectionExec: expr=[c1@0 as c1, MAX(aggregate_test_100.c12)@1 as MAX(aggregate_test_100.c12), MIN(aggregate_test_100.c12)@2 as the_min]", + " HashAggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1], aggr=[MAX(aggregate_test_100.c12), MIN(aggregate_test_100.c12)]", " CoalesceBatchesExec: target_batch_size=4096", " RepartitionExec: partitioning=Hash([Column { name: \"c1\", index: 0 }], 3)", - " HashAggregateExec: mode=Partial, gby=[c1@0 as c1], aggr=[MAX(c12), MIN(c12)]", + " HashAggregateExec: mode=Partial, gby=[c1@0 as c1], aggr=[MAX(aggregate_test_100.c12), MIN(aggregate_test_100.c12)]", " CoalesceBatchesExec: target_batch_size=4096", " FilterExec: c12@1 < CAST(10 AS Float64)", " RepartitionExec: partitioning=RoundRobinBatch(3)", diff --git a/docs/specification/output-field-name-semantic.md b/docs/specification/output-field-name-semantic.md index 0407a17617e9..bc0813abd06b 100644 --- a/docs/specification/output-field-name-semantic.md +++ b/docs/specification/output-field-name-semantic.md @@ -25,10 +25,12 @@ Datafusion queries planned from both SQL queries and Dataframe APIs. ## Field name rules -- All field names MUST not contain relation/table qualifier. +- All bare column field names MUST not contain relation/table qualifier. - Both `SELECT t1.id`, `SELECT id` and `df.select_columns(&["id"])` SHOULD result in field name: `id` +- All compound column field names MUST contain relation/table qualifier. + - `SELECT foo + bar` SHOULD result in field name: `table.foo PLUS table.bar` - Function names MUST be converted to lowercase. - - `SELECT AVG(c1)` SHOULD result in field name: `avg(c1)` + - `SELECT AVG(c1)` SHOULD result in field name: `avg(table.c1)` - Literal string MUST not be wrapped with quotes or double quotes. - `SELECT 'foo'` SHOULD result in field name: `foo` - Operator expressions MUST be wrapped with parentheses. @@ -36,7 +38,7 @@ Datafusion queries planned from both SQL queries and Dataframe APIs. - Operator and operand MUST be separated by spaces. - `SELECT 1+2` SHOULD result in field name: `(1 + 2)` - Function arguments MUST be separated by a comma `,` and a space. - - `SELECT f(c1,c2)` and `df.select(vec![f.udf("f")?.call(vec![col("c1"), col("c2")])])` SHOULD result in field name: `f(c1, c2)` + - `SELECT f(c1,c2)` and `df.select(vec![f.udf("f")?.call(vec![col("c1"), col("c2")])])` SHOULD result in field name: `f(table.c1, table.c2)` ## Appendices @@ -95,10 +97,10 @@ SELECT ABS(t1.id), abs(-id) FROM t1; Datafusion Arrow record batches output: -| abs(id) | abs((- id)) | -| ------- | ----------- | -| 1 | 1 | -| 2 | 2 | +| abs(t1.id) | abs((- t1.id)) | +| ---------- | -------------- | +| 1 | 1 | +| 2 | 2 | Spark output: @@ -138,10 +140,10 @@ SELECT t1.id + ABS(id), ABS(id * t1.id) FROM t1; Datafusion Arrow record batches output: -| id + abs(id) | abs(id \* id) | -| ------------ | ------------- | -| 2 | 1 | -| 4 | 4 | +| t1.id + abs(t1.id) | abs(t1.id \* t1.id) | +| ------------------ | ------------------- | +| 2 | 1 | +| 4 | 4 | Spark output: