diff --git a/Cargo.lock b/Cargo.lock index 0e2dac279b91..cb581a1a0bcb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1969,6 +1969,7 @@ dependencies = [ "half", "hashbrown 0.14.5", "indexmap 2.7.1", + "insta", "libc", "log", "object_store", @@ -2424,6 +2425,7 @@ dependencies = [ "datafusion-physical-expr", "datafusion-physical-expr-common", "datafusion-physical-plan", + "insta", "itertools 0.14.0", "log", "recursive", diff --git a/datafusion-cli/src/main.rs b/datafusion-cli/src/main.rs index 52665df3751e..e21006312d85 100644 --- a/datafusion-cli/src/main.rs +++ b/datafusion-cli/src/main.rs @@ -322,7 +322,8 @@ fn extract_memory_pool_size(size: &str) -> Result { #[cfg(test)] mod tests { use super::*; - use datafusion::assert_batches_eq; + use datafusion::common::test_util::batches_to_string; + use insta::assert_snapshot; fn assert_conversion(input: &str, expected: Result) { let result = extract_memory_pool_size(input); @@ -391,21 +392,26 @@ mod tests { let df = ctx.sql(sql).await?; let rbs = df.collect().await?; - let excepted = [ - "+-------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+-------+-----------+-----------+------------------+----------------------+-----------------+-----------------+-------------+------------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+", - "| filename | row_group_id | row_group_num_rows | row_group_num_columns | row_group_bytes | column_id | file_offset | num_values | path_in_schema | type | stats_min | stats_max | stats_null_count | stats_distinct_count | stats_min_value | stats_max_value | compression | encodings | index_page_offset | dictionary_page_offset | data_page_offset | total_compressed_size | total_uncompressed_size |", - "+-------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+-------+-----------+-----------+------------------+----------------------+-----------------+-----------------+-------------+------------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+", - "| ../datafusion/core/tests/data/fixed_size_list_array.parquet | 0 | 2 | 1 | 123 | 0 | 125 | 4 | \"f0.list.item\" | INT64 | 1 | 4 | 0 | | 1 | 4 | SNAPPY | [RLE_DICTIONARY, PLAIN, RLE] | | 4 | 46 | 121 | 123 |", - "+-------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+-------+-----------+-----------+------------------+----------------------+-----------------+-----------------+-------------+------------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+", - ]; - assert_batches_eq!(excepted, &rbs); + assert_snapshot!(batches_to_string(&rbs), @r#" + +-------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+-------+-----------+-----------+------------------+----------------------+-----------------+-----------------+-------------+------------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+ + | filename | row_group_id | row_group_num_rows | row_group_num_columns | row_group_bytes | column_id | file_offset | num_values | path_in_schema | type | stats_min | stats_max | stats_null_count | stats_distinct_count | stats_min_value | stats_max_value | compression | encodings | index_page_offset | dictionary_page_offset | data_page_offset | total_compressed_size | total_uncompressed_size | + +-------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+-------+-----------+-----------+------------------+----------------------+-----------------+-----------------+-------------+------------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+ + | ../datafusion/core/tests/data/fixed_size_list_array.parquet | 0 | 2 | 1 | 123 | 0 | 125 | 4 | "f0.list.item" | INT64 | 1 | 4 | 0 | | 1 | 4 | SNAPPY | [RLE_DICTIONARY, PLAIN, RLE] | | 4 | 46 | 121 | 123 | + +-------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+-------+-----------+-----------+------------------+----------------------+-----------------+-----------------+-------------+------------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+ + "#); // input with double quote let sql = "SELECT * FROM parquet_metadata(\"../datafusion/core/tests/data/fixed_size_list_array.parquet\")"; let df = ctx.sql(sql).await?; let rbs = df.collect().await?; - assert_batches_eq!(excepted, &rbs); + assert_snapshot!(batches_to_string(&rbs), @r#" + +-------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+-------+-----------+-----------+------------------+----------------------+-----------------+-----------------+-------------+------------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+ + | filename | row_group_id | row_group_num_rows | row_group_num_columns | row_group_bytes | column_id | file_offset | num_values | path_in_schema | type | stats_min | stats_max | stats_null_count | stats_distinct_count | stats_min_value | stats_max_value | compression | encodings | index_page_offset | dictionary_page_offset | data_page_offset | total_compressed_size | total_uncompressed_size | + +-------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+-------+-----------+-----------+------------------+----------------------+-----------------+-----------------+-------------+------------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+ + | ../datafusion/core/tests/data/fixed_size_list_array.parquet | 0 | 2 | 1 | 123 | 0 | 125 | 4 | "f0.list.item" | INT64 | 1 | 4 | 0 | | 1 | 4 | SNAPPY | [RLE_DICTIONARY, PLAIN, RLE] | | 4 | 46 | 121 | 123 | + +-------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+-------+-----------+-----------+------------------+----------------------+-----------------+-----------------+-------------+------------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+ + "#); Ok(()) } @@ -421,15 +427,13 @@ mod tests { let df = ctx.sql(sql).await?; let rbs = df.collect().await?; - let excepted = [ - -"+-----------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+------------+-----------+-----------+------------------+----------------------+-----------------+-----------------+--------------------+--------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+", -"| filename | row_group_id | row_group_num_rows | row_group_num_columns | row_group_bytes | column_id | file_offset | num_values | path_in_schema | type | stats_min | stats_max | stats_null_count | stats_distinct_count | stats_min_value | stats_max_value | compression | encodings | index_page_offset | dictionary_page_offset | data_page_offset | total_compressed_size | total_uncompressed_size |", -"+-----------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+------------+-----------+-----------+------------------+----------------------+-----------------+-----------------+--------------------+--------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+", -"| ../parquet-testing/data/data_index_bloom_encoding_stats.parquet | 0 | 14 | 1 | 163 | 0 | 4 | 14 | \"String\" | BYTE_ARRAY | Hello | today | 0 | | Hello | today | GZIP(GzipLevel(6)) | [BIT_PACKED, RLE, PLAIN] | | | 4 | 152 | 163 |", -"+-----------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+------------+-----------+-----------+------------------+----------------------+-----------------+-----------------+--------------------+--------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+" - ]; - assert_batches_eq!(excepted, &rbs); + assert_snapshot!(batches_to_string(&rbs),@r#" + +-----------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+------------+-----------+-----------+------------------+----------------------+-----------------+-----------------+--------------------+--------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+ + | filename | row_group_id | row_group_num_rows | row_group_num_columns | row_group_bytes | column_id | file_offset | num_values | path_in_schema | type | stats_min | stats_max | stats_null_count | stats_distinct_count | stats_min_value | stats_max_value | compression | encodings | index_page_offset | dictionary_page_offset | data_page_offset | total_compressed_size | total_uncompressed_size | + +-----------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+------------+-----------+-----------+------------------+----------------------+-----------------+-----------------+--------------------+--------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+ + | ../parquet-testing/data/data_index_bloom_encoding_stats.parquet | 0 | 14 | 1 | 163 | 0 | 4 | 14 | "String" | BYTE_ARRAY | Hello | today | 0 | | Hello | today | GZIP(GzipLevel(6)) | [BIT_PACKED, RLE, PLAIN] | | | 4 | 152 | 163 | + +-----------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+------------+-----------+-----------+------------------+----------------------+-----------------+-----------------+--------------------+--------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+ + "#); Ok(()) } diff --git a/datafusion/common/Cargo.toml b/datafusion/common/Cargo.toml index a607f796fc9c..39b47a96bccf 100644 --- a/datafusion/common/Cargo.toml +++ b/datafusion/common/Cargo.toml @@ -73,4 +73,5 @@ web-time = "1.1.0" [dev-dependencies] chrono = { workspace = true } +insta = { workspace = true } rand = { workspace = true } diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs index 367f359ae742..2b758f456876 100644 --- a/datafusion/common/src/scalar/mod.rs +++ b/datafusion/common/src/scalar/mod.rs @@ -38,6 +38,7 @@ use crate::cast::{ as_fixed_size_binary_array, as_fixed_size_list_array, }; use crate::error::{DataFusionError, Result, _exec_err, _internal_err, _not_impl_err}; +use crate::format::DEFAULT_CAST_OPTIONS; use crate::hash_utils::create_hashes; use crate::utils::SingleRowListArrayBuilder; use arrow::array::{ @@ -58,8 +59,6 @@ use arrow::datatypes::{ UInt8Type, UnionFields, UnionMode, DECIMAL128_MAX_PRECISION, }; use arrow::util::display::{array_value_to_string, ArrayFormatter, FormatOptions}; - -use crate::format::DEFAULT_CAST_OPTIONS; use half::f16; pub use struct_builder::ScalarStructBuilder; @@ -3976,7 +3975,7 @@ mod tests { as_map_array, as_string_array, as_struct_array, as_uint32_array, as_uint64_array, }; - use crate::assert_batches_eq; + use crate::test_util::batches_to_string; use arrow::array::{types::Float64Type, NullBufferBuilder}; use arrow::buffer::{Buffer, OffsetBuffer}; use arrow::compute::{is_null, kernels}; @@ -3984,6 +3983,7 @@ mod tests { use arrow::error::ArrowError; use arrow::util::pretty::pretty_format_columns; use chrono::NaiveDate; + use insta::assert_snapshot; use rand::Rng; #[test] @@ -6910,14 +6910,13 @@ mod tests { //verify compared to arrow display let batch = RecordBatch::try_from_iter(vec![("s", arr as _)]).unwrap(); - let expected = [ - "+-------------+", - "| s |", - "+-------------+", - "| {a: 1, b: } |", - "+-------------+", - ]; - assert_batches_eq!(&expected, &[batch]); + assert_snapshot!(batches_to_string(&[batch]), @r" + +-------------+ + | s | + +-------------+ + | {a: 1, b: } | + +-------------+ + "); } #[test] @@ -6946,14 +6945,13 @@ mod tests { //verify compared to arrow display let batch = RecordBatch::try_from_iter(vec![("s", arr as _)]).unwrap(); - let expected = [ - "+--------------+", - "| s |", - "+--------------+", - "| {a: 1, b: 2} |", - "+--------------+", - ]; - assert_batches_eq!(&expected, &[batch]); + assert_snapshot!(batches_to_string(&[batch]), @r" + +--------------+ + | s | + +--------------+ + | {a: 1, b: 2} | + +--------------+ + "); } #[test] @@ -6969,15 +6967,13 @@ mod tests { //verify compared to arrow display let batch = RecordBatch::try_from_iter(vec![("s", arr as _)]).unwrap(); - #[rustfmt::skip] - let expected = [ - "+---+", - "| s |", - "+---+", - "| |", - "+---+", - ]; - assert_batches_eq!(&expected, &[batch]); + assert_snapshot!(batches_to_string(&[batch]), @r" + +---+ + | s | + +---+ + | | + +---+ + "); } #[test] @@ -7011,17 +7007,16 @@ mod tests { //verify compared to arrow display let batch = RecordBatch::try_from_iter(vec![("m", arr as _)]).unwrap(); - let expected = [ - "+--------------------+", - "| m |", - "+--------------------+", - "| {joe: 1} |", - "| {blogs: 2, foo: 4} |", - "| {} |", - "| |", - "+--------------------+", - ]; - assert_batches_eq!(&expected, &[batch]); + assert_snapshot!(batches_to_string(&[batch]), @r" + +--------------------+ + | m | + +--------------------+ + | {joe: 1} | + | {blogs: 2, foo: 4} | + | {} | + | | + +--------------------+ + "); } #[test] diff --git a/datafusion/core/src/execution/context/csv.rs b/datafusion/core/src/execution/context/csv.rs index 3e7db1caa20f..15d6d21f038a 100644 --- a/datafusion/core/src/execution/context/csv.rs +++ b/datafusion/core/src/execution/context/csv.rs @@ -89,8 +89,9 @@ impl SessionContext { #[cfg(test)] mod tests { use super::*; - use crate::assert_batches_eq; use crate::test_util::{plan_and_collect, populate_csv_partitions}; + use datafusion_common::test_util::batches_to_string; + use insta::assert_snapshot; use tempfile::TempDir; @@ -115,14 +116,13 @@ mod tests { plan_and_collect(&ctx, "SELECT sum(c1), sum(c2), count(*) FROM test").await?; assert_eq!(results.len(), 1); - let expected = [ - "+--------------+--------------+----------+", - "| sum(test.c1) | sum(test.c2) | count(*) |", - "+--------------+--------------+----------+", - "| 10 | 110 | 20 |", - "+--------------+--------------+----------+", - ]; - assert_batches_eq!(expected, &results); + assert_snapshot!(batches_to_string(&results), @r" + +--------------+--------------+----------+ + | sum(test.c1) | sum(test.c2) | count(*) | + +--------------+--------------+----------+ + | 10 | 110 | 20 | + +--------------+--------------+----------+ + "); Ok(()) } diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs index faf689179eca..714e94234a2c 100644 --- a/datafusion/core/src/execution/context/mod.rs +++ b/datafusion/core/src/execution/context/mod.rs @@ -1828,7 +1828,6 @@ impl<'n> TreeNodeVisitor<'n> for BadPlanVisitor<'_> { #[cfg(test)] mod tests { use super::{super::options::CsvReadOptions, *}; - use crate::assert_batches_eq; use crate::execution::memory_pool::MemoryConsumer; use crate::test; use crate::test_util::{plan_and_collect, populate_csv_partitions}; @@ -1837,7 +1836,9 @@ mod tests { use std::error::Error; use std::path::PathBuf; + use datafusion_common::test_util::batches_to_string; use datafusion_common_runtime::SpawnedTask; + use insta::{allow_duplicates, assert_snapshot}; use crate::catalog::SchemaProvider; use crate::execution::session_state::SessionStateBuilder; @@ -1900,14 +1901,13 @@ mod tests { plan_and_collect(&ctx, "SELECT @@version, @name, @integer + 1 FROM dual") .await?; - let expected = [ - "+----------------------+------------------------+---------------------+", - "| @@version | @name | @integer + Int64(1) |", - "+----------------------+------------------------+---------------------+", - "| system-var-@@version | user-defined-var-@name | 42 |", - "+----------------------+------------------------+---------------------+", - ]; - assert_batches_eq!(expected, &results); + assert_snapshot!(batches_to_string(&results), @r" + +----------------------+------------------------+---------------------+ + | @@version | @name | @integer + Int64(1) | + +----------------------+------------------------+---------------------+ + | system-var-@@version | user-defined-var-@name | 42 | + +----------------------+------------------------+---------------------+ + "); Ok(()) } @@ -1988,14 +1988,15 @@ mod tests { let actual = arrow::util::pretty::pretty_format_batches(&result) .unwrap() .to_string(); - let expected = r#"+--------------------+ -| c_name | -+--------------------+ -| Customer#000000002 | -| Customer#000000003 | -| Customer#000000004 | -+--------------------+"#; - assert_eq!(actual, expected); + assert_snapshot!(actual, @r" + +--------------------+ + | c_name | + +--------------------+ + | Customer#000000002 | + | Customer#000000003 | + | Customer#000000004 | + +--------------------+ + "); Ok(()) } @@ -2020,14 +2021,15 @@ mod tests { let actual = arrow::util::pretty::pretty_format_batches(&result) .unwrap() .to_string(); - let expected = r#"+--------------------+ -| c_name | -+--------------------+ -| Customer#000000002 | -| Customer#000000003 | -| Customer#000000004 | -+--------------------+"#; - assert_eq!(actual, expected); + assert_snapshot!(actual, @r" + +--------------------+ + | c_name | + +--------------------+ + | Customer#000000002 | + | Customer#000000003 | + | Customer#000000004 | + +--------------------+ + "); Ok(()) } @@ -2110,6 +2112,8 @@ mod tests { .unwrap(); ctx.register_catalog("my_catalog", Arc::new(catalog)); + let mut results = Vec::new(); + for table_ref in &["my_catalog.my_schema.test", "my_schema.test", "test"] { let result = plan_and_collect( &ctx, @@ -2118,14 +2122,18 @@ mod tests { .await .unwrap(); - let expected = [ - "+-------+", - "| count |", - "+-------+", - "| 1 |", - "+-------+", - ]; - assert_batches_eq!(expected, &result); + results.push(result); + } + allow_duplicates! { + for result in &results { + assert_snapshot!(batches_to_string(result), @r" + +-------+ + | count | + +-------+ + | 1 | + +-------+ + "); + } } } @@ -2160,15 +2168,14 @@ mod tests { ) .await?; - let expected = [ - "+-----+-------+", - "| cat | total |", - "+-----+-------+", - "| a | 1 |", - "| b | 3 |", - "+-----+-------+", - ]; - assert_batches_eq!(expected, &result); + assert_snapshot!(batches_to_string(&result), @r" + +-----+-------+ + | cat | total | + +-----+-------+ + | a | 1 | + | b | 3 | + +-----+-------+ + "); Ok(()) } @@ -2257,14 +2264,13 @@ mod tests { .await? .collect() .await?; - let expected = [ - "+-----------------------------+", - "| Utf8(\"2021-01-01 00:00:00\") |", - "+-----------------------------+", - "| 2021-01-01T00:00:00 |", - "+-----------------------------+", - ]; - assert_batches_eq!(expected, &result); + assert_snapshot!(batches_to_string(&result), @r#" + +-----------------------------+ + | Utf8("2021-01-01 00:00:00") | + +-----------------------------+ + | 2021-01-01T00:00:00 | + +-----------------------------+ + "#); Ok(()) } #[test] diff --git a/datafusion/core/tests/catalog/memory.rs b/datafusion/core/tests/catalog/memory.rs index 3e45fb753226..b0753eb5c949 100644 --- a/datafusion/core/tests/catalog/memory.rs +++ b/datafusion/core/tests/catalog/memory.rs @@ -24,7 +24,8 @@ use datafusion::datasource::listing::{ use datafusion::prelude::SessionContext; use datafusion_catalog::memory::*; use datafusion_catalog::{SchemaProvider, TableProvider}; -use datafusion_common::assert_batches_eq; +use datafusion_common::test_util::batches_to_string; +use insta::assert_snapshot; use std::any::Any; use std::sync::Arc; @@ -152,19 +153,18 @@ async fn test_schema_register_listing_table() { let actual = df.collect().await.unwrap(); - let expected = [ - "+----+----------+", - "| id | bool_col |", - "+----+----------+", - "| 4 | true |", - "| 5 | false |", - "| 6 | true |", - "| 7 | false |", - "| 2 | true |", - "| 3 | false |", - "| 0 | true |", - "| 1 | false |", - "+----+----------+", - ]; - assert_batches_eq!(expected, &actual); + assert_snapshot!(batches_to_string(&actual), @r" + +----+----------+ + | id | bool_col | + +----+----------+ + | 4 | true | + | 5 | false | + | 6 | true | + | 7 | false | + | 2 | true | + | 3 | false | + | 0 | true | + | 1 | false | + +----+----------+ + "); } diff --git a/datafusion/core/tests/parquet/custom_reader.rs b/datafusion/core/tests/parquet/custom_reader.rs index b12b3be2d435..4a4059db2547 100644 --- a/datafusion/core/tests/parquet/custom_reader.rs +++ b/datafusion/core/tests/parquet/custom_reader.rs @@ -23,7 +23,6 @@ use std::time::SystemTime; use arrow::array::{ArrayRef, Int64Array, Int8Array, StringArray}; use arrow::datatypes::{Field, Schema, SchemaBuilder}; use arrow::record_batch::RecordBatch; -use datafusion::assert_batches_sorted_eq; use datafusion::datasource::file_format::parquet::fetch_parquet_metadata; use datafusion::datasource::listing::PartitionedFile; use datafusion::datasource::object_store::ObjectStoreUrl; @@ -33,11 +32,13 @@ use datafusion::datasource::physical_plan::{ use datafusion::physical_plan::collect; use datafusion::physical_plan::metrics::ExecutionPlanMetricsSet; use datafusion::prelude::SessionContext; +use datafusion_common::test_util::batches_to_sort_string; use datafusion_common::Result; use bytes::Bytes; use futures::future::BoxFuture; use futures::{FutureExt, TryFutureExt}; +use insta::assert_snapshot; use object_store::memory::InMemory; use object_store::path::Path; use object_store::{ObjectMeta, ObjectStore}; @@ -96,17 +97,15 @@ async fn route_data_access_ops_to_parquet_file_reader_factory() { let task_ctx = session_ctx.task_ctx(); let read = collect(parquet_exec, task_ctx).await.unwrap(); - let expected = [ - "+-----+----+----+", - "| c1 | c2 | c3 |", - "+-----+----+----+", - "| Foo | 1 | 10 |", - "| | 2 | 20 |", - "| bar | | |", - "+-----+----+----+", - ]; - - assert_batches_sorted_eq!(expected, &read); + assert_snapshot!(batches_to_sort_string(&read), @r" + +-----+----+----+ + | c1 | c2 | c3 | + +-----+----+----+ + | | 2 | 20 | + | Foo | 1 | 10 | + | bar | | | + +-----+----+----+ + "); } #[derive(Debug)] diff --git a/datafusion/core/tests/parquet/schema.rs b/datafusion/core/tests/parquet/schema.rs index e13fbad24426..29afd3970432 100644 --- a/datafusion/core/tests/parquet/schema.rs +++ b/datafusion/core/tests/parquet/schema.rs @@ -21,7 +21,8 @@ use std::{collections::HashMap, fs, path::Path}; use tempfile::TempDir; use super::*; -use datafusion_common::assert_batches_sorted_eq; +use datafusion_common::test_util::batches_to_sort_string; +use insta::assert_snapshot; #[tokio::test] async fn schema_merge_ignores_metadata_by_default() { @@ -57,20 +58,6 @@ async fn schema_merge_ignores_metadata_by_default() { ]; write_files(table_dir.as_path(), schemas); - // can be any order - let expected = [ - "+----+------+", - "| id | name |", - "+----+------+", - "| 1 | test |", - "| 2 | test |", - "| 3 | test |", - "| 0 | test |", - "| 5 | test |", - "| 4 | test |", - "+----+------+", - ]; - // Read the parquet files into a dataframe to confirm results // (no errors) let table_path = table_dir.to_str().unwrap().to_string(); @@ -82,7 +69,18 @@ async fn schema_merge_ignores_metadata_by_default() { .unwrap(); let actual = df.collect().await.unwrap(); - assert_batches_sorted_eq!(expected, &actual); + assert_snapshot!(batches_to_sort_string(&actual), @r" + +----+------+ + | id | name | + +----+------+ + | 0 | test | + | 1 | test | + | 2 | test | + | 3 | test | + | 4 | test | + | 5 | test | + +----+------+ + "); assert_no_metadata(&actual); // also validate it works via SQL interface as well @@ -97,7 +95,18 @@ async fn schema_merge_ignores_metadata_by_default() { .collect() .await .unwrap(); - assert_batches_sorted_eq!(expected, &actual); + assert_snapshot!(batches_to_sort_string(&actual), @r" + +----+------+ + | id | name | + +----+------+ + | 0 | test | + | 1 | test | + | 2 | test | + | 3 | test | + | 4 | test | + | 5 | test | + +----+------+ + "); assert_no_metadata(&actual); } @@ -124,17 +133,6 @@ async fn schema_merge_can_preserve_metadata() { ]; write_files(table_dir.as_path(), schemas); - // can be any order - let expected = [ - "+----+------+", - "| id | name |", - "+----+------+", - "| 1 | test |", - "| 2 | test |", - "| 0 | test |", - "+----+------+", - ]; - let mut expected_metadata = make_meta("foo", "bar"); expected_metadata.insert("foo2".into(), "baz".into()); @@ -153,7 +151,15 @@ async fn schema_merge_can_preserve_metadata() { let actual = df.collect().await.unwrap(); - assert_batches_sorted_eq!(expected, &actual); + assert_snapshot!(batches_to_sort_string(&actual), @r" + +----+------+ + | id | name | + +----+------+ + | 0 | test | + | 1 | test | + | 2 | test | + +----+------+ + "); assert_metadata(&actual, &expected_metadata); // also validate it works via SQL interface as well @@ -167,7 +173,15 @@ async fn schema_merge_can_preserve_metadata() { assert_eq!(actual.clone(), expected_metadata); let actual = df.collect().await.unwrap(); - assert_batches_sorted_eq!(expected, &actual); + assert_snapshot!(batches_to_sort_string(&actual), @r" + +----+------+ + | id | name | + +----+------+ + | 0 | test | + | 1 | test | + | 2 | test | + +----+------+ + "); assert_metadata(&actual, &expected_metadata); } diff --git a/datafusion/core/tests/parquet/schema_coercion.rs b/datafusion/core/tests/parquet/schema_coercion.rs index bb20246bf9d5..85bc1104795f 100644 --- a/datafusion/core/tests/parquet/schema_coercion.rs +++ b/datafusion/core/tests/parquet/schema_coercion.rs @@ -22,14 +22,15 @@ use arrow::array::{ StringArray, }; use arrow::datatypes::{DataType, Field, Schema}; -use datafusion::assert_batches_sorted_eq; use datafusion::datasource::physical_plan::{FileScanConfig, ParquetSource}; use datafusion::physical_plan::collect; use datafusion::prelude::SessionContext; use datafusion::test::object_store::local_unpartitioned_file; +use datafusion_common::test_util::batches_to_sort_string; use datafusion_common::Result; use datafusion_execution::object_store::ObjectStoreUrl; +use insta::assert_snapshot; use object_store::ObjectMeta; use parquet::arrow::ArrowWriter; use parquet::file::properties::WriterProperties; @@ -70,19 +71,18 @@ async fn multi_parquet_coercion() { let task_ctx = session_ctx.task_ctx(); let read = collect(parquet_exec, task_ctx).await.unwrap(); - let expected = [ - "+-------+----+------+", - "| c1 | c2 | c3 |", - "+-------+----+------+", - "| | | |", - "| | 1 | 10.0 |", - "| | 2 | |", - "| | 2 | 20.0 |", - "| one | 1 | |", - "| three | | |", - "+-------+----+------+", - ]; - assert_batches_sorted_eq!(expected, &read); + assert_snapshot!(batches_to_sort_string(&read), @r" + +-------+----+------+ + | c1 | c2 | c3 | + +-------+----+------+ + | | | | + | | 1 | 10.0 | + | | 2 | | + | | 2 | 20.0 | + | one | 1 | | + | three | | | + +-------+----+------+ + "); } #[tokio::test] @@ -127,19 +127,18 @@ async fn multi_parquet_coercion_projection() { let task_ctx = session_ctx.task_ctx(); let read = collect(parquet_exec, task_ctx).await.unwrap(); - let expected = [ - "+----+-------+------+", - "| c2 | c1 | c3 |", - "+----+-------+------+", - "| | foo | |", - "| | three | |", - "| 1 | baz | 10.0 |", - "| 1 | one | |", - "| 2 | | |", - "| 2 | Boo | 20.0 |", - "+----+-------+------+", - ]; - assert_batches_sorted_eq!(expected, &read); + assert_snapshot!(batches_to_sort_string(&read), @r" + +----+-------+------+ + | c2 | c1 | c3 | + +----+-------+------+ + | | foo | | + | | three | | + | 1 | baz | 10.0 | + | 1 | one | | + | 2 | | | + | 2 | Boo | 20.0 | + +----+-------+------+ + "); } /// Writes `batches` to a temporary parquet file diff --git a/datafusion/core/tests/sql/aggregates.rs b/datafusion/core/tests/sql/aggregates.rs index 7b1f349e15b5..52372e01d41a 100644 --- a/datafusion/core/tests/sql/aggregates.rs +++ b/datafusion/core/tests/sql/aggregates.rs @@ -72,14 +72,13 @@ async fn count_partitioned() -> Result<()> { execute_with_partition("SELECT count(c1), count(c2) FROM test", 4).await?; assert_eq!(results.len(), 1); - let expected = [ - "+----------------+----------------+", - "| count(test.c1) | count(test.c2) |", - "+----------------+----------------+", - "| 40 | 40 |", - "+----------------+----------------+", - ]; - assert_batches_sorted_eq!(expected, &results); + assert_snapshot!(batches_to_sort_string(&results), @r" + +----------------+----------------+ + | count(test.c1) | count(test.c2) | + +----------------+----------------+ + | 40 | 40 | + +----------------+----------------+ + "); Ok(()) } @@ -88,17 +87,16 @@ async fn count_aggregated() -> Result<()> { let results = execute_with_partition("SELECT c1, count(c2) FROM test GROUP BY c1", 4).await?; - let expected = [ - "+----+----------------+", - "| c1 | count(test.c2) |", - "+----+----------------+", - "| 0 | 10 |", - "| 1 | 10 |", - "| 2 | 10 |", - "| 3 | 10 |", - "+----+----------------+", - ]; - assert_batches_sorted_eq!(expected, &results); + assert_snapshot!(batches_to_sort_string(&results), @r" + +----+----------------+ + | c1 | count(test.c2) | + +----+----------------+ + | 0 | 10 | + | 1 | 10 | + | 2 | 10 | + | 3 | 10 | + +----+----------------+ + "); Ok(()) } @@ -110,68 +108,67 @@ async fn count_aggregated_cube() -> Result<()> { ) .await?; - let expected = vec![ - "+----+----+----------------+", - "| c1 | c2 | count(test.c3) |", - "+----+----+----------------+", - "| | | 40 |", - "| | 1 | 4 |", - "| | 10 | 4 |", - "| | 2 | 4 |", - "| | 3 | 4 |", - "| | 4 | 4 |", - "| | 5 | 4 |", - "| | 6 | 4 |", - "| | 7 | 4 |", - "| | 8 | 4 |", - "| | 9 | 4 |", - "| 0 | | 10 |", - "| 0 | 1 | 1 |", - "| 0 | 10 | 1 |", - "| 0 | 2 | 1 |", - "| 0 | 3 | 1 |", - "| 0 | 4 | 1 |", - "| 0 | 5 | 1 |", - "| 0 | 6 | 1 |", - "| 0 | 7 | 1 |", - "| 0 | 8 | 1 |", - "| 0 | 9 | 1 |", - "| 1 | | 10 |", - "| 1 | 1 | 1 |", - "| 1 | 10 | 1 |", - "| 1 | 2 | 1 |", - "| 1 | 3 | 1 |", - "| 1 | 4 | 1 |", - "| 1 | 5 | 1 |", - "| 1 | 6 | 1 |", - "| 1 | 7 | 1 |", - "| 1 | 8 | 1 |", - "| 1 | 9 | 1 |", - "| 2 | | 10 |", - "| 2 | 1 | 1 |", - "| 2 | 10 | 1 |", - "| 2 | 2 | 1 |", - "| 2 | 3 | 1 |", - "| 2 | 4 | 1 |", - "| 2 | 5 | 1 |", - "| 2 | 6 | 1 |", - "| 2 | 7 | 1 |", - "| 2 | 8 | 1 |", - "| 2 | 9 | 1 |", - "| 3 | | 10 |", - "| 3 | 1 | 1 |", - "| 3 | 10 | 1 |", - "| 3 | 2 | 1 |", - "| 3 | 3 | 1 |", - "| 3 | 4 | 1 |", - "| 3 | 5 | 1 |", - "| 3 | 6 | 1 |", - "| 3 | 7 | 1 |", - "| 3 | 8 | 1 |", - "| 3 | 9 | 1 |", - "+----+----+----------------+", - ]; - assert_batches_sorted_eq!(expected, &results); + assert_snapshot!(batches_to_sort_string(&results), @r" + +----+----+----------------+ + | c1 | c2 | count(test.c3) | + +----+----+----------------+ + | | | 40 | + | | 1 | 4 | + | | 10 | 4 | + | | 2 | 4 | + | | 3 | 4 | + | | 4 | 4 | + | | 5 | 4 | + | | 6 | 4 | + | | 7 | 4 | + | | 8 | 4 | + | | 9 | 4 | + | 0 | | 10 | + | 0 | 1 | 1 | + | 0 | 10 | 1 | + | 0 | 2 | 1 | + | 0 | 3 | 1 | + | 0 | 4 | 1 | + | 0 | 5 | 1 | + | 0 | 6 | 1 | + | 0 | 7 | 1 | + | 0 | 8 | 1 | + | 0 | 9 | 1 | + | 1 | | 10 | + | 1 | 1 | 1 | + | 1 | 10 | 1 | + | 1 | 2 | 1 | + | 1 | 3 | 1 | + | 1 | 4 | 1 | + | 1 | 5 | 1 | + | 1 | 6 | 1 | + | 1 | 7 | 1 | + | 1 | 8 | 1 | + | 1 | 9 | 1 | + | 2 | | 10 | + | 2 | 1 | 1 | + | 2 | 10 | 1 | + | 2 | 2 | 1 | + | 2 | 3 | 1 | + | 2 | 4 | 1 | + | 2 | 5 | 1 | + | 2 | 6 | 1 | + | 2 | 7 | 1 | + | 2 | 8 | 1 | + | 2 | 9 | 1 | + | 3 | | 10 | + | 3 | 1 | 1 | + | 3 | 10 | 1 | + | 3 | 2 | 1 | + | 3 | 3 | 1 | + | 3 | 4 | 1 | + | 3 | 5 | 1 | + | 3 | 6 | 1 | + | 3 | 7 | 1 | + | 3 | 8 | 1 | + | 3 | 9 | 1 | + +----+----+----------------+ + "); Ok(()) } @@ -259,14 +256,15 @@ async fn count_distinct_integers_aggregated_single_partition() -> Result<()> { let results = run_count_distinct_integers_aggregated_scenario(partitions).await?; - let expected = ["+---------+----------------------+-----------------------------+------------------------------+------------------------------+------------------------------+------------------------------+-------------------------------+-------------------------------+-------------------------------+", - "| c_group | count(test.c_uint64) | count(DISTINCT test.c_int8) | count(DISTINCT test.c_int16) | count(DISTINCT test.c_int32) | count(DISTINCT test.c_int64) | count(DISTINCT test.c_uint8) | count(DISTINCT test.c_uint16) | count(DISTINCT test.c_uint32) | count(DISTINCT test.c_uint64) |", - "+---------+----------------------+-----------------------------+------------------------------+------------------------------+------------------------------+------------------------------+-------------------------------+-------------------------------+-------------------------------+", - "| a | 3 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 |", - "| b | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |", - "| c | 3 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 |", - "+---------+----------------------+-----------------------------+------------------------------+------------------------------+------------------------------+------------------------------+-------------------------------+-------------------------------+-------------------------------+"]; - assert_batches_sorted_eq!(expected, &results); + assert_snapshot!(batches_to_sort_string(&results), @r" + +---------+----------------------+-----------------------------+------------------------------+------------------------------+------------------------------+------------------------------+-------------------------------+-------------------------------+-------------------------------+ + | c_group | count(test.c_uint64) | count(DISTINCT test.c_int8) | count(DISTINCT test.c_int16) | count(DISTINCT test.c_int32) | count(DISTINCT test.c_int64) | count(DISTINCT test.c_uint8) | count(DISTINCT test.c_uint16) | count(DISTINCT test.c_uint32) | count(DISTINCT test.c_uint64) | + +---------+----------------------+-----------------------------+------------------------------+------------------------------+------------------------------+------------------------------+-------------------------------+-------------------------------+-------------------------------+ + | a | 3 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | + | b | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | + | c | 3 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | + +---------+----------------------+-----------------------------+------------------------------+------------------------------+------------------------------+------------------------------+-------------------------------+-------------------------------+-------------------------------+ + "); Ok(()) } @@ -283,14 +281,15 @@ async fn count_distinct_integers_aggregated_multiple_partitions() -> Result<()> let results = run_count_distinct_integers_aggregated_scenario(partitions).await?; - let expected = ["+---------+----------------------+-----------------------------+------------------------------+------------------------------+------------------------------+------------------------------+-------------------------------+-------------------------------+-------------------------------+", - "| c_group | count(test.c_uint64) | count(DISTINCT test.c_int8) | count(DISTINCT test.c_int16) | count(DISTINCT test.c_int32) | count(DISTINCT test.c_int64) | count(DISTINCT test.c_uint8) | count(DISTINCT test.c_uint16) | count(DISTINCT test.c_uint32) | count(DISTINCT test.c_uint64) |", - "+---------+----------------------+-----------------------------+------------------------------+------------------------------+------------------------------+------------------------------+-------------------------------+-------------------------------+-------------------------------+", - "| a | 5 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 |", - "| b | 5 | 4 | 4 | 4 | 4 | 4 | 4 | 4 | 4 |", - "| c | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |", - "+---------+----------------------+-----------------------------+------------------------------+------------------------------+------------------------------+------------------------------+-------------------------------+-------------------------------+-------------------------------+"]; - assert_batches_sorted_eq!(expected, &results); + assert_snapshot!(batches_to_sort_string(&results), @r" + +---------+----------------------+-----------------------------+------------------------------+------------------------------+------------------------------+------------------------------+-------------------------------+-------------------------------+-------------------------------+ + | c_group | count(test.c_uint64) | count(DISTINCT test.c_int8) | count(DISTINCT test.c_int16) | count(DISTINCT test.c_int32) | count(DISTINCT test.c_int64) | count(DISTINCT test.c_uint8) | count(DISTINCT test.c_uint16) | count(DISTINCT test.c_uint32) | count(DISTINCT test.c_uint64) | + +---------+----------------------+-----------------------------+------------------------------+------------------------------+------------------------------+------------------------------+-------------------------------+-------------------------------+-------------------------------+ + | a | 5 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | + | b | 5 | 4 | 4 | 4 | 4 | 4 | 4 | 4 | 4 | + | c | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | + +---------+----------------------+-----------------------------+------------------------------+------------------------------+------------------------------+------------------------------+-------------------------------+-------------------------------+-------------------------------+ + "); Ok(()) } @@ -308,16 +307,17 @@ async fn test_accumulator_row_accumulator() -> Result<()> { LIMIT 5"; let actual = execute_to_batches(&ctx, sql).await; - let expected = ["+----+----+--------------------------------+-----------+--------------------------------+------------+--------------------+--------------------------------+------+--------------+", - "| c1 | c2 | min1 | min2 | max1 | max2 | avg1 | min3 | cnt1 | sum1 |", - "+----+----+--------------------------------+-----------+--------------------------------+------------+--------------------+--------------------------------+------+--------------+", - "| a | 1 | 0keZ5G8BffGwgF2RwQD59TFzMStxCB | 774637006 | waIGbOGl1PM6gnzZ4uuZt4E2yDWRHs | 4015442341 | 2437927011.0 | 0keZ5G8BffGwgF2RwQD59TFzMStxCB | 5 | 6094771121.5 |", - "| a | 2 | b3b9esRhTzFEawbs6XhpKnD9ojutHB | 145294611 | ukyD7b0Efj7tNlFSRmzZ0IqkEzg2a8 | 3717551163 | 2267588664.0 | b3b9esRhTzFEawbs6XhpKnD9ojutHB | 3 | 3401364777.0 |", - "| a | 3 | Amn2K87Db5Es3dFQO9cw9cvpAM6h35 | 431948861 | oLZ21P2JEDooxV1pU31cIxQHEeeoLu | 3998790955 | 2225685115.1666665 | Amn2K87Db5Es3dFQO9cw9cvpAM6h35 | 6 | 6676994872.5 |", - "| a | 4 | KJFcmTVjdkCMv94wYCtfHMFhzyRsmH | 466439833 | ydkwycaISlYSlEq3TlkS2m15I2pcp8 | 2502326480 | 1655431654.0 | KJFcmTVjdkCMv94wYCtfHMFhzyRsmH | 4 | 3310812222.5 |", - "| a | 5 | MeSTAXq8gVxVjbEjgkvU9YLte0X9uE | 141047417 | QJYm7YRA3YetcBHI5wkMZeLXVmfuNy | 2496054700 | 1216992989.6666667 | MeSTAXq8gVxVjbEjgkvU9YLte0X9uE | 3 | 1825431770.0 |", - "+----+----+--------------------------------+-----------+--------------------------------+------------+--------------------+--------------------------------+------+--------------+"]; - assert_batches_eq!(expected, &actual); + assert_snapshot!(batches_to_sort_string(&actual), @r" + +----+----+--------------------------------+-----------+--------------------------------+------------+--------------------+--------------------------------+------+--------------+ + | c1 | c2 | min1 | min2 | max1 | max2 | avg1 | min3 | cnt1 | sum1 | + +----+----+--------------------------------+-----------+--------------------------------+------------+--------------------+--------------------------------+------+--------------+ + | a | 1 | 0keZ5G8BffGwgF2RwQD59TFzMStxCB | 774637006 | waIGbOGl1PM6gnzZ4uuZt4E2yDWRHs | 4015442341 | 2437927011.0 | 0keZ5G8BffGwgF2RwQD59TFzMStxCB | 5 | 6094771121.5 | + | a | 2 | b3b9esRhTzFEawbs6XhpKnD9ojutHB | 145294611 | ukyD7b0Efj7tNlFSRmzZ0IqkEzg2a8 | 3717551163 | 2267588664.0 | b3b9esRhTzFEawbs6XhpKnD9ojutHB | 3 | 3401364777.0 | + | a | 3 | Amn2K87Db5Es3dFQO9cw9cvpAM6h35 | 431948861 | oLZ21P2JEDooxV1pU31cIxQHEeeoLu | 3998790955 | 2225685115.1666665 | Amn2K87Db5Es3dFQO9cw9cvpAM6h35 | 6 | 6676994872.5 | + | a | 4 | KJFcmTVjdkCMv94wYCtfHMFhzyRsmH | 466439833 | ydkwycaISlYSlEq3TlkS2m15I2pcp8 | 2502326480 | 1655431654.0 | KJFcmTVjdkCMv94wYCtfHMFhzyRsmH | 4 | 3310812222.5 | + | a | 5 | MeSTAXq8gVxVjbEjgkvU9YLte0X9uE | 141047417 | QJYm7YRA3YetcBHI5wkMZeLXVmfuNy | 2496054700 | 1216992989.6666667 | MeSTAXq8gVxVjbEjgkvU9YLte0X9uE | 3 | 1825431770.0 | + +----+----+--------------------------------+-----------+--------------------------------+------------+--------------------+--------------------------------+------+--------------+ + "); Ok(()) } diff --git a/datafusion/core/tests/sql/mod.rs b/datafusion/core/tests/sql/mod.rs index 03c4ad7c013e..579049692e7d 100644 --- a/datafusion/core/tests/sql/mod.rs +++ b/datafusion/core/tests/sql/mod.rs @@ -30,10 +30,11 @@ use datafusion::physical_plan::ExecutionPlan; use datafusion::physical_plan::ExecutionPlanVisitor; use datafusion::prelude::*; use datafusion::test_util; -use datafusion::{assert_batches_eq, assert_batches_sorted_eq}; use datafusion::{execution::context::SessionContext, physical_plan::displayable}; +use datafusion_common::test_util::batches_to_sort_string; use datafusion_common::utils::get_available_parallelism; use datafusion_common::{assert_contains, assert_not_contains}; +use insta::assert_snapshot; use object_store::path::Path; use std::fs::File; use std::io::Write; diff --git a/datafusion/core/tests/sql/path_partition.rs b/datafusion/core/tests/sql/path_partition.rs index c88051d5c9ef..46aecd1dc070 100644 --- a/datafusion/core/tests/sql/path_partition.rs +++ b/datafusion/core/tests/sql/path_partition.rs @@ -28,7 +28,6 @@ use datafusion::datasource::listing::ListingTableUrl; use datafusion::datasource::physical_plan::{FileScanConfig, ParquetSource}; use datafusion::datasource::source::DataSourceExec; use datafusion::{ - assert_batches_sorted_eq, datasource::{ file_format::{csv::CsvFormat, parquet::ParquetFormat}, listing::{ListingOptions, ListingTable, ListingTableConfig}, @@ -40,6 +39,7 @@ use datafusion::{ }; use datafusion_catalog::TableProvider; use datafusion_common::stats::Precision; +use datafusion_common::test_util::batches_to_sort_string; use datafusion_common::ScalarValue; use datafusion_execution::config::SessionConfig; use datafusion_expr::{col, lit, Expr, Operator}; @@ -49,6 +49,7 @@ use async_trait::async_trait; use bytes::Bytes; use chrono::{TimeZone, Utc}; use futures::stream::{self, BoxStream}; +use insta::assert_snapshot; use object_store::{ path::Path, GetOptions, GetResult, GetResultPayload, ListResult, ObjectMeta, ObjectStore, PutOptions, PutResult, @@ -145,16 +146,15 @@ async fn parquet_distinct_partition_col() -> Result<()> { .collect() .await?; - let expected = [ - "+------+-------+-----+", - "| year | month | day |", - "+------+-------+-----+", - "| 2021 | 09 | 09 |", - "| 2021 | 10 | 09 |", - "| 2021 | 10 | 28 |", - "+------+-------+-----+", - ]; - assert_batches_sorted_eq!(expected, &result); + assert_snapshot!(batches_to_sort_string(&result), @r" + +------+-------+-----+ + | year | month | day | + +------+-------+-----+ + | 2021 | 09 | 09 | + | 2021 | 10 | 09 | + | 2021 | 10 | 28 | + +------+-------+-----+ + "); //Test that the number of rows returned by partition column scan and actually reading the parquet file are the same let actual_row_count: usize = ctx .sql("SELECT id from t") @@ -275,18 +275,17 @@ async fn csv_filter_with_file_col() -> Result<()> { .collect() .await?; - let expected = [ - "+----+----+", - "| c1 | c2 |", - "+----+----+", - "| a | 1 |", - "| b | 1 |", - "| b | 5 |", - "| c | 2 |", - "| d | 5 |", - "+----+----+", - ]; - assert_batches_sorted_eq!(expected, &result); + assert_snapshot!(batches_to_sort_string(&result), @r" + +----+----+ + | c1 | c2 | + +----+----+ + | a | 1 | + | b | 1 | + | b | 5 | + | c | 2 | + | d | 5 | + +----+----+ + "); Ok(()) } @@ -313,18 +312,17 @@ async fn csv_filter_with_file_nonstring_col() -> Result<()> { .collect() .await?; - let expected = [ - "+----+----+------------+", - "| c1 | c2 | date |", - "+----+----+------------+", - "| a | 1 | 2021-10-28 |", - "| b | 1 | 2021-10-28 |", - "| b | 5 | 2021-10-28 |", - "| c | 2 | 2021-10-28 |", - "| d | 5 | 2021-10-28 |", - "+----+----+------------+", - ]; - assert_batches_sorted_eq!(expected, &result); + assert_snapshot!(batches_to_sort_string(&result), @r" + +----+----+------------+ + | c1 | c2 | date | + +----+----+------------+ + | a | 1 | 2021-10-28 | + | b | 1 | 2021-10-28 | + | b | 5 | 2021-10-28 | + | c | 2 | 2021-10-28 | + | d | 5 | 2021-10-28 | + +----+----+------------+ + "); Ok(()) } @@ -351,18 +349,17 @@ async fn csv_projection_on_partition() -> Result<()> { .collect() .await?; - let expected = [ - "+----+------------+", - "| c1 | date |", - "+----+------------+", - "| a | 2021-10-27 |", - "| b | 2021-10-27 |", - "| b | 2021-10-27 |", - "| c | 2021-10-27 |", - "| d | 2021-10-27 |", - "+----+------------+", - ]; - assert_batches_sorted_eq!(expected, &result); + assert_snapshot!(batches_to_sort_string(&result), @r" + +----+------------+ + | c1 | date | + +----+------------+ + | a | 2021-10-27 | + | b | 2021-10-27 | + | b | 2021-10-27 | + | c | 2021-10-27 | + | d | 2021-10-27 | + +----+------------+ + "); Ok(()) } @@ -390,15 +387,14 @@ async fn csv_grouping_by_partition() -> Result<()> { .collect() .await?; - let expected = [ - "+------------+----------+----------------------+", - "| date | count(*) | count(DISTINCT t.c1) |", - "+------------+----------+----------------------+", - "| 2021-10-26 | 100 | 5 |", - "| 2021-10-27 | 100 | 5 |", - "+------------+----------+----------------------+", - ]; - assert_batches_sorted_eq!(expected, &result); + assert_snapshot!(batches_to_sort_string(&result), @r" + +------------+----------+----------------------+ + | date | count(*) | count(DISTINCT t.c1) | + +------------+----------+----------------------+ + | 2021-10-26 | 100 | 5 | + | 2021-10-27 | 100 | 5 | + +------------+----------+----------------------+ + "); Ok(()) } @@ -430,21 +426,20 @@ async fn parquet_multiple_partitions() -> Result<()> { .collect() .await?; - let expected = [ - "+----+-----+", - "| id | day |", - "+----+-----+", - "| 0 | 09 |", - "| 1 | 09 |", - "| 2 | 09 |", - "| 3 | 09 |", - "| 4 | 09 |", - "| 5 | 09 |", - "| 6 | 09 |", - "| 7 | 09 |", - "+----+-----+", - ]; - assert_batches_sorted_eq!(expected, &result); + assert_snapshot!(batches_to_sort_string(&result), @r" + +----+-----+ + | id | day | + +----+-----+ + | 0 | 09 | + | 1 | 09 | + | 2 | 09 | + | 3 | 09 | + | 4 | 09 | + | 5 | 09 | + | 6 | 09 | + | 7 | 09 | + +----+-----+ + "); Ok(()) } @@ -476,21 +471,20 @@ async fn parquet_multiple_nonstring_partitions() -> Result<()> { .collect() .await?; - let expected = [ - "+----+-----+", - "| id | day |", - "+----+-----+", - "| 0 | 9 |", - "| 1 | 9 |", - "| 2 | 9 |", - "| 3 | 9 |", - "| 4 | 9 |", - "| 5 | 9 |", - "| 6 | 9 |", - "| 7 | 9 |", - "+----+-----+", - ]; - assert_batches_sorted_eq!(expected, &result); + assert_snapshot!(batches_to_sort_string(&result), @r" + +----+-----+ + | id | day | + +----+-----+ + | 0 | 9 | + | 1 | 9 | + | 2 | 9 | + | 3 | 9 | + | 4 | 9 | + | 5 | 9 | + | 6 | 9 | + | 7 | 9 | + +----+-----+ + "); Ok(()) } diff --git a/datafusion/core/tests/sql/select.rs b/datafusion/core/tests/sql/select.rs index 6e81bf6410c1..f874dd7c0842 100644 --- a/datafusion/core/tests/sql/select.rs +++ b/datafusion/core/tests/sql/select.rs @@ -30,23 +30,22 @@ async fn test_list_query_parameters() -> Result<()> { .with_param_values(vec![ScalarValue::from(3i32)])? .collect() .await?; - let expected = vec![ - "+----+----+-------+", - "| c1 | c2 | c3 |", - "+----+----+-------+", - "| 3 | 1 | false |", - "| 3 | 10 | true |", - "| 3 | 2 | true |", - "| 3 | 3 | false |", - "| 3 | 4 | true |", - "| 3 | 5 | false |", - "| 3 | 6 | true |", - "| 3 | 7 | false |", - "| 3 | 8 | true |", - "| 3 | 9 | false |", - "+----+----+-------+", - ]; - assert_batches_sorted_eq!(expected, &results); + assert_snapshot!(batches_to_sort_string(&results), @r" + +----+----+-------+ + | c1 | c2 | c3 | + +----+----+-------+ + | 3 | 1 | false | + | 3 | 10 | true | + | 3 | 2 | true | + | 3 | 3 | false | + | 3 | 4 | true | + | 3 | 5 | false | + | 3 | 6 | true | + | 3 | 7 | false | + | 3 | 8 | true | + | 3 | 9 | false | + +----+----+-------+ + "); Ok(()) } @@ -66,33 +65,32 @@ async fn test_named_query_parameters() -> Result<()> { ])? .collect() .await?; - let expected = vec![ - "+----+----+", - "| c1 | c2 |", - "+----+----+", - "| 1 | 1 |", - "| 1 | 2 |", - "| 1 | 3 |", - "| 1 | 4 |", - "| 1 | 5 |", - "| 1 | 6 |", - "| 1 | 7 |", - "| 1 | 8 |", - "| 1 | 9 |", - "| 1 | 10 |", - "| 2 | 1 |", - "| 2 | 2 |", - "| 2 | 3 |", - "| 2 | 4 |", - "| 2 | 5 |", - "| 2 | 6 |", - "| 2 | 7 |", - "| 2 | 8 |", - "| 2 | 9 |", - "| 2 | 10 |", - "+----+----+", - ]; - assert_batches_sorted_eq!(expected, &results); + assert_snapshot!(batches_to_sort_string(&results), @r" + +----+----+ + | c1 | c2 | + +----+----+ + | 1 | 1 | + | 1 | 10 | + | 1 | 2 | + | 1 | 3 | + | 1 | 4 | + | 1 | 5 | + | 1 | 6 | + | 1 | 7 | + | 1 | 8 | + | 1 | 9 | + | 2 | 1 | + | 2 | 10 | + | 2 | 2 | + | 2 | 3 | + | 2 | 4 | + | 2 | 5 | + | 2 | 6 | + | 2 | 7 | + | 2 | 8 | + | 2 | 9 | + +----+----+ + "); Ok(()) } @@ -114,33 +112,32 @@ async fn test_prepare_statement() -> Result<()> { let dataframe = dataframe.with_param_values(param_values)?; let results = dataframe.collect().await?; - let expected = vec![ - "+----+----+", - "| c1 | c2 |", - "+----+----+", - "| 1 | 1 |", - "| 1 | 10 |", - "| 1 | 2 |", - "| 1 | 3 |", - "| 1 | 4 |", - "| 1 | 5 |", - "| 1 | 6 |", - "| 1 | 7 |", - "| 1 | 8 |", - "| 1 | 9 |", - "| 2 | 1 |", - "| 2 | 10 |", - "| 2 | 2 |", - "| 2 | 3 |", - "| 2 | 4 |", - "| 2 | 5 |", - "| 2 | 6 |", - "| 2 | 7 |", - "| 2 | 8 |", - "| 2 | 9 |", - "+----+----+", - ]; - assert_batches_sorted_eq!(expected, &results); + assert_snapshot!(batches_to_sort_string(&results), @r" + +----+----+ + | c1 | c2 | + +----+----+ + | 1 | 1 | + | 1 | 10 | + | 1 | 2 | + | 1 | 3 | + | 1 | 4 | + | 1 | 5 | + | 1 | 6 | + | 1 | 7 | + | 1 | 8 | + | 1 | 9 | + | 2 | 1 | + | 2 | 10 | + | 2 | 2 | + | 2 | 3 | + | 2 | 4 | + | 2 | 5 | + | 2 | 6 | + | 2 | 7 | + | 2 | 8 | + | 2 | 9 | + +----+----+ + "); Ok(()) } @@ -164,14 +161,13 @@ async fn prepared_statement_type_coercion() -> Result<()> { ])? .collect() .await?; - let expected = [ - "+--------+----------+", - "| signed | unsigned |", - "+--------+----------+", - "| -1 | 1 |", - "+--------+----------+", - ]; - assert_batches_sorted_eq!(expected, &results); + assert_snapshot!(batches_to_sort_string(&results), @r" + +--------+----------+ + | signed | unsigned | + +--------+----------+ + | -1 | 1 | + +--------+----------+ + "); Ok(()) } @@ -194,14 +190,13 @@ async fn test_parameter_type_coercion() -> Result<()> { ("str", ScalarValue::from("1")), ])? .collect().await?; - let expected = [ - "+--------+----------+", - "| signed | unsigned |", - "+--------+----------+", - "| -1 | 1 |", - "+--------+----------+", - ]; - assert_batches_sorted_eq!(expected, &results); + assert_snapshot!(batches_to_sort_string(&results), @r" + +--------+----------+ + | signed | unsigned | + +--------+----------+ + | -1 | 1 | + +--------+----------+ + "); Ok(()) } @@ -263,14 +258,13 @@ async fn test_positional_parameter_not_bound() -> Result<()> { .collect() .await?; - let expected = [ - "+--------+----------+", - "| signed | unsigned |", - "+--------+----------+", - "| -1 | 1 |", - "+--------+----------+", - ]; - assert_batches_sorted_eq!(expected, &results); + assert_snapshot!(batches_to_sort_string(&results), @r" + +--------+----------+ + | signed | unsigned | + +--------+----------+ + | -1 | 1 | + +--------+----------+ + "); Ok(()) } @@ -309,14 +303,13 @@ async fn test_named_parameter_not_bound() -> Result<()> { .collect() .await?; - let expected = [ - "+--------+----------+", - "| signed | unsigned |", - "+--------+----------+", - "| -1 | 1 |", - "+--------+----------+", - ]; - assert_batches_sorted_eq!(expected, &results); + assert_snapshot!(batches_to_sort_string(&results), @r" + +--------+----------+ + | signed | unsigned | + +--------+----------+ + | -1 | 1 | + +--------+----------+ + "); Ok(()) } diff --git a/datafusion/physical-optimizer/Cargo.toml b/datafusion/physical-optimizer/Cargo.toml index e8473e6556d1..aaadb09bcc98 100644 --- a/datafusion/physical-optimizer/Cargo.toml +++ b/datafusion/physical-optimizer/Cargo.toml @@ -53,3 +53,4 @@ recursive = { workspace = true, optional = true } [dev-dependencies] datafusion-expr = { workspace = true } datafusion-functions-nested = { workspace = true } +insta = { workspace = true } diff --git a/datafusion/physical-optimizer/src/pruning.rs b/datafusion/physical-optimizer/src/pruning.rs index 2004aeafb893..b5287f3d33f3 100644 --- a/datafusion/physical-optimizer/src/pruning.rs +++ b/datafusion/physical-optimizer/src/pruning.rs @@ -1884,8 +1884,9 @@ mod tests { use std::ops::{Not, Rem}; use super::*; - use datafusion_common::assert_batches_eq; + use datafusion_common::test_util::batches_to_string; use datafusion_expr::{col, lit}; + use insta::assert_snapshot; use arrow::array::Decimal128Array; use arrow::{ @@ -2466,18 +2467,16 @@ mod tests { let batch = build_statistics_record_batch(&statistics, &required_columns).unwrap(); - let expected = [ - "+--------+--------+--------+--------+", - "| s1_min | s2_max | s3_max | s3_min |", - "+--------+--------+--------+--------+", - "| | 20 | q | a |", - "| | | | |", - "| 9 | | r | |", - "| | | | |", - "+--------+--------+--------+--------+", - ]; - - assert_batches_eq!(expected, &[batch]); + assert_snapshot!(batches_to_string(&[batch]), @r" + +--------+--------+--------+--------+ + | s1_min | s2_max | s3_max | s3_min | + +--------+--------+--------+--------+ + | | 20 | q | a | + | | | | | + | 9 | | r | | + | | | | | + +--------+--------+--------+--------+ + "); } #[test] @@ -2505,15 +2504,14 @@ mod tests { let batch = build_statistics_record_batch(&statistics, &required_columns).unwrap(); - let expected = [ - "+-------------------------------+", - "| s1_min |", - "+-------------------------------+", - "| 1970-01-01T00:00:00.000000010 |", - "+-------------------------------+", - ]; - assert_batches_eq!(expected, &[batch]); + assert_snapshot!(batches_to_string(&[batch]), @r" + +-------------------------------+ + | s1_min | + +-------------------------------+ + | 1970-01-01T00:00:00.000000010 | + +-------------------------------+ + "); } #[test] @@ -2551,15 +2549,13 @@ mod tests { let batch = build_statistics_record_batch(&statistics, &required_columns).unwrap(); - let expected = [ - "+--------+", - "| s1_min |", - "+--------+", - "| |", - "+--------+", - ]; - - assert_batches_eq!(expected, &[batch]); + assert_snapshot!(batches_to_string(&[batch]), @r" + +--------+ + | s1_min | + +--------+ + | | + +--------+ + "); } #[test]