From 1d3aad46cd708d0bdf805804d296a7875d7936e6 Mon Sep 17 00:00:00 2001 From: irenjj Date: Thu, 13 Mar 2025 22:11:03 +0800 Subject: [PATCH 1/3] Implement `tree` explain for `ArrowFileSink` --- .../core/src/datasource/file_format/arrow.rs | 8 +- datafusion/datasource-csv/src/file_format.rs | 6 +- datafusion/datasource-json/src/file_format.rs | 6 +- datafusion/datasource/src/display.rs | 115 +++++++++++++++++- .../sqllogictest/test_files/explain_tree.slt | 59 ++++++--- 5 files changed, 166 insertions(+), 28 deletions(-) diff --git a/datafusion/core/src/datasource/file_format/arrow.rs b/datafusion/core/src/datasource/file_format/arrow.rs index 6835d9a6da2a..d7499157eadf 100644 --- a/datafusion/core/src/datasource/file_format/arrow.rs +++ b/datafusion/core/src/datasource/file_format/arrow.rs @@ -47,7 +47,7 @@ use datafusion_common::{ not_impl_err, DataFusionError, GetExt, Statistics, DEFAULT_ARROW_EXTENSION, }; use datafusion_common_runtime::SpawnedTask; -use datafusion_datasource::display::FileGroupDisplay; +use datafusion_datasource::display::{FileGroupDisplay, TablePathsDisplay}; use datafusion_datasource::file::FileSource; use datafusion_datasource::file_scan_config::FileScanConfig; use datafusion_execution::{SendableRecordBatchStream, TaskContext}; @@ -302,8 +302,10 @@ impl DisplayAs for ArrowFileSink { write!(f, ")") } DisplayFormatType::TreeRender => { - // TODO: collect info - write!(f, "") + if !self.config.table_paths.is_empty() { + TablePathsDisplay(&self.config.table_paths).fmt_as(t, f)?; + } + Ok(()) } } } diff --git a/datafusion/datasource-csv/src/file_format.rs b/datafusion/datasource-csv/src/file_format.rs index cc1b63cfc9b4..20769f933120 100644 --- a/datafusion/datasource-csv/src/file_format.rs +++ b/datafusion/datasource-csv/src/file_format.rs @@ -35,7 +35,7 @@ use datafusion_common::{ }; use datafusion_common_runtime::SpawnedTask; use datafusion_datasource::decoder::Decoder; -use datafusion_datasource::display::FileGroupDisplay; +use datafusion_datasource::display::{FileGroupDisplay, TablePathsDisplay}; use datafusion_datasource::file::FileSource; use datafusion_datasource::file_compression_type::FileCompressionType; use datafusion_datasource::file_format::{ @@ -666,8 +666,8 @@ impl DisplayAs for CsvSink { write!(f, ")") } DisplayFormatType::TreeRender => { - if !self.config.file_groups.is_empty() { - FileGroupDisplay(&self.config.file_groups).fmt_as(t, f)?; + if !self.config.table_paths.is_empty() { + TablePathsDisplay(&self.config.table_paths).fmt_as(t, f)?; } Ok(()) } diff --git a/datafusion/datasource-json/src/file_format.rs b/datafusion/datasource-json/src/file_format.rs index bec3a524f657..673937a3b109 100644 --- a/datafusion/datasource-json/src/file_format.rs +++ b/datafusion/datasource-json/src/file_format.rs @@ -37,7 +37,7 @@ use datafusion_common::{ }; use datafusion_common_runtime::SpawnedTask; use datafusion_datasource::decoder::Decoder; -use datafusion_datasource::display::FileGroupDisplay; +use datafusion_datasource::display::{FileGroupDisplay, TablePathsDisplay}; use datafusion_datasource::file::FileSource; use datafusion_datasource::file_compression_type::FileCompressionType; use datafusion_datasource::file_format::{ @@ -325,8 +325,8 @@ impl DisplayAs for JsonSink { write!(f, ")") } DisplayFormatType::TreeRender => { - if !self.config.file_groups.is_empty() { - FileGroupDisplay(&self.config.file_groups).fmt_as(t, f)?; + if !self.config.table_paths.is_empty() { + TablePathsDisplay(&self.config.table_paths).fmt_as(t, f)?; } Ok(()) } diff --git a/datafusion/datasource/src/display.rs b/datafusion/datasource/src/display.rs index 7ab8d407be52..d1fd3a6b278a 100644 --- a/datafusion/datasource/src/display.rs +++ b/datafusion/datasource/src/display.rs @@ -19,7 +19,7 @@ use datafusion_physical_plan::{DisplayAs, DisplayFormatType}; use std::fmt::{Debug, Formatter, Result as FmtResult}; -use crate::PartitionedFile; +use crate::{ListingTableUrl, PartitionedFile}; /// A wrapper to customize partitioned file display /// @@ -91,6 +91,36 @@ impl DisplayAs for FileGroupDisplay<'_> { } } +/// A wrapper to customize table paths display +/// +/// Prints in the format: +/// ```text +/// table_path1, table_path2, ... +/// ``` +#[derive(Debug)] +pub struct TablePathsDisplay<'a>(pub &'a [ListingTableUrl]); + +impl DisplayAs for TablePathsDisplay<'_> { + fn fmt_as(&self, t: DisplayFormatType, f: &mut Formatter) -> FmtResult { + match t { + DisplayFormatType::Default | DisplayFormatType::TreeRender => { + // To avoid showing too many paths + let max_paths = 5; + fmt_up_to_n_elements(self.0, max_paths, f, |path, f| { + write!(f, "{}", path.as_str()) + })?; + } + DisplayFormatType::Verbose => { + fmt_elements_split_by_commas(self.0.iter(), f, |path, f| { + write!(f, "{}", path.as_str()) + })?; + } + } + + Ok(()) + } +} + /// helper to format an array of up to N elements fn fmt_up_to_n_elements( elements: &[E], @@ -139,6 +169,7 @@ mod tests { use object_store::{path::Path, ObjectMeta}; use chrono::Utc; + use url::Url; #[test] fn file_groups_display_empty() { @@ -292,4 +323,86 @@ mod tests { metadata_size_hint: None, } } + + #[test] + fn table_paths_display_empty() { + let expected = ""; + assert_eq!(DefaultDisplay(TablePathsDisplay(&[])).to_string(), expected); + } + + #[test] + fn table_paths_display_one() { + let url = Url::parse("file:///foo/bar.csv").unwrap(); + let table_path = ListingTableUrl::parse(url).unwrap(); + let paths = [table_path]; + + let expected = "file:///foo/bar.csv"; + assert_eq!( + DefaultDisplay(TablePathsDisplay(&paths)).to_string(), + expected + ); + } + + #[test] + fn table_paths_display_many_default() { + let urls = vec![ + Url::parse("file:///foo/a.csv").unwrap(), + Url::parse("file:///foo/b.csv").unwrap(), + Url::parse("file:///foo/c.csv").unwrap(), + ]; + let paths: Vec<_> = urls + .into_iter() + .map(|url| ListingTableUrl::parse(url).unwrap()) + .collect(); + + let expected = "file:///foo/a.csv, file:///foo/b.csv, file:///foo/c.csv"; + assert_eq!( + DefaultDisplay(TablePathsDisplay(&paths)).to_string(), + expected + ); + } + + #[test] + fn table_paths_display_too_many_default() { + let urls = vec![ + Url::parse("file:///foo/a.csv").unwrap(), + Url::parse("file:///foo/b.csv").unwrap(), + Url::parse("file:///foo/c.csv").unwrap(), + Url::parse("file:///foo/d.csv").unwrap(), + Url::parse("file:///foo/e.csv").unwrap(), + Url::parse("file:///foo/f.csv").unwrap(), + ]; + let paths: Vec<_> = urls + .into_iter() + .map(|url| ListingTableUrl::parse(url).unwrap()) + .collect(); + + let expected = "file:///foo/a.csv, file:///foo/b.csv, file:///foo/c.csv, file:///foo/d.csv, file:///foo/e.csv, ..."; + assert_eq!( + DefaultDisplay(TablePathsDisplay(&paths)).to_string(), + expected + ); + } + + #[test] + fn table_paths_display_too_many_verbose() { + let urls = vec![ + Url::parse("file:///foo/a.csv").unwrap(), + Url::parse("file:///foo/b.csv").unwrap(), + Url::parse("file:///foo/c.csv").unwrap(), + Url::parse("file:///foo/d.csv").unwrap(), + Url::parse("file:///foo/e.csv").unwrap(), + Url::parse("file:///foo/f.csv").unwrap(), + ]; + let paths: Vec<_> = urls + .into_iter() + .map(|url| ListingTableUrl::parse(url).unwrap()) + .collect(); + + let expected = "file:///foo/a.csv, file:///foo/b.csv, file:///foo/c.csv, file:///foo/d.csv, file:///foo/e.csv, file:///foo/f.csv"; + assert_eq!( + VerboseDisplay(TablePathsDisplay(&paths)).to_string(), + expected + ); + } } diff --git a/datafusion/sqllogictest/test_files/explain_tree.slt b/datafusion/sqllogictest/test_files/explain_tree.slt index 9e047133fcfc..0397e0b06c52 100644 --- a/datafusion/sqllogictest/test_files/explain_tree.slt +++ b/datafusion/sqllogictest/test_files/explain_tree.slt @@ -1711,35 +1711,58 @@ physical_plan query TT explain COPY (VALUES (1, 'foo', 1, '2023-01-01'), (2, 'bar', 2, '2023-01-02'), (3, 'baz', 3, '2023-01-03')) -TO 'test_files/scratch/explain_tree/1.json'; +TO '/tmp/1.json'; ---- physical_plan 01)┌───────────────────────────┐ 02)│ DataSinkExec │ -03)└─────────────┬─────────────┘ -04)┌─────────────┴─────────────┐ -05)│ DataSourceExec │ -06)│ -------------------- │ -07)│ bytes: 2672 │ -08)│ format: memory │ -09)│ rows: 1 │ -10)└───────────────────────────┘ +03)│ -------------------- │ +04)│ file:///tmp/1.json │ +05)└─────────────┬─────────────┘ +06)┌─────────────┴─────────────┐ +07)│ DataSourceExec │ +08)│ -------------------- │ +09)│ bytes: 2672 │ +10)│ format: memory │ +11)│ rows: 1 │ +12)└───────────────────────────┘ query TT explain COPY (VALUES (1, 'foo', 1, '2023-01-01'), (2, 'bar', 2, '2023-01-02'), (3, 'baz', 3, '2023-01-03')) -TO 'test_files/scratch/explain_tree/2.csv'; +TO '/tmp/2.csv'; ---- physical_plan 01)┌───────────────────────────┐ 02)│ DataSinkExec │ -03)└─────────────┬─────────────┘ -04)┌─────────────┴─────────────┐ -05)│ DataSourceExec │ -06)│ -------------------- │ -07)│ bytes: 2672 │ -08)│ format: memory │ -09)│ rows: 1 │ -10)└───────────────────────────┘ +03)│ -------------------- │ +04)│ file:///tmp/2.csv │ +05)└─────────────┬─────────────┘ +06)┌─────────────┴─────────────┐ +07)│ DataSourceExec │ +08)│ -------------------- │ +09)│ bytes: 2672 │ +10)│ format: memory │ +11)│ rows: 1 │ +12)└───────────────────────────┘ + +query TT +explain COPY (VALUES (1, 'foo', 1, '2023-01-01'), (2, 'bar', 2, '2023-01-02'), (3, 'baz', 3, '2023-01-03')) +TO '/tmp/3.arrow'; +---- +physical_plan +01)┌───────────────────────────┐ +02)│ DataSinkExec │ +03)│ -------------------- │ +04)│ file:///tmp/3.arrow │ +05)└─────────────┬─────────────┘ +06)┌─────────────┴─────────────┐ +07)│ DataSourceExec │ +08)│ -------------------- │ +09)│ bytes: 2672 │ +10)│ format: memory │ +11)│ rows: 1 │ +12)└───────────────────────────┘ + # Test explain tree rendering for CoalesceBatchesExec with limit statement ok From e0134502f2fe6bc73277e15fdd956beb09664f48 Mon Sep 17 00:00:00 2001 From: irenjj Date: Fri, 14 Mar 2025 17:56:35 +0800 Subject: [PATCH 2/3] fmt original_url --- .../core/src/datasource/file_format/arrow.rs | 8 +- .../core/src/datasource/listing/table.rs | 1 + datafusion/core/src/physical_planner.rs | 2 + datafusion/datasource-csv/src/file_format.rs | 8 +- datafusion/datasource-json/src/file_format.rs | 8 +- datafusion/datasource/src/display.rs | 115 +----------------- datafusion/datasource/src/file_sink_config.rs | 2 + .../proto/src/physical_plan/from_proto.rs | 1 + .../sqllogictest/test_files/explain_tree.slt | 75 +++++++----- 9 files changed, 61 insertions(+), 159 deletions(-) diff --git a/datafusion/core/src/datasource/file_format/arrow.rs b/datafusion/core/src/datasource/file_format/arrow.rs index d7499157eadf..71d8d0026674 100644 --- a/datafusion/core/src/datasource/file_format/arrow.rs +++ b/datafusion/core/src/datasource/file_format/arrow.rs @@ -47,7 +47,7 @@ use datafusion_common::{ not_impl_err, DataFusionError, GetExt, Statistics, DEFAULT_ARROW_EXTENSION, }; use datafusion_common_runtime::SpawnedTask; -use datafusion_datasource::display::{FileGroupDisplay, TablePathsDisplay}; +use datafusion_datasource::display::FileGroupDisplay; use datafusion_datasource::file::FileSource; use datafusion_datasource::file_scan_config::FileScanConfig; use datafusion_execution::{SendableRecordBatchStream, TaskContext}; @@ -302,10 +302,8 @@ impl DisplayAs for ArrowFileSink { write!(f, ")") } DisplayFormatType::TreeRender => { - if !self.config.table_paths.is_empty() { - TablePathsDisplay(&self.config.table_paths).fmt_as(t, f)?; - } - Ok(()) + writeln!(f, "format: arrow")?; + write!(f, "file={}", &self.config.original_url) } } } diff --git a/datafusion/core/src/datasource/listing/table.rs b/datafusion/core/src/datasource/listing/table.rs index 4d7762784d78..21b35bac2174 100644 --- a/datafusion/core/src/datasource/listing/table.rs +++ b/datafusion/core/src/datasource/listing/table.rs @@ -1037,6 +1037,7 @@ impl TableProvider for ListingTable { // Sink related option, apart from format let config = FileSinkConfig { + original_url: String::default(), object_store_url: self.table_paths()[0].object_store(), table_paths: self.table_paths().clone(), file_groups, diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs index 6aff9280ffad..170f85af7a89 100644 --- a/datafusion/core/src/physical_planner.rs +++ b/datafusion/core/src/physical_planner.rs @@ -502,6 +502,7 @@ impl DefaultPhysicalPlanner { partition_by, options: source_option_tuples, }) => { + let original_url = output_url.clone(); let input_exec = children.one()?; let parsed_url = ListingTableUrl::parse(output_url)?; let object_store_url = parsed_url.object_store(); @@ -531,6 +532,7 @@ impl DefaultPhysicalPlanner { // Set file sink related options let config = FileSinkConfig { + original_url, object_store_url, table_paths: vec![parsed_url], file_groups: vec![], diff --git a/datafusion/datasource-csv/src/file_format.rs b/datafusion/datasource-csv/src/file_format.rs index 20769f933120..522cb12db0c7 100644 --- a/datafusion/datasource-csv/src/file_format.rs +++ b/datafusion/datasource-csv/src/file_format.rs @@ -35,7 +35,7 @@ use datafusion_common::{ }; use datafusion_common_runtime::SpawnedTask; use datafusion_datasource::decoder::Decoder; -use datafusion_datasource::display::{FileGroupDisplay, TablePathsDisplay}; +use datafusion_datasource::display::FileGroupDisplay; use datafusion_datasource::file::FileSource; use datafusion_datasource::file_compression_type::FileCompressionType; use datafusion_datasource::file_format::{ @@ -666,10 +666,8 @@ impl DisplayAs for CsvSink { write!(f, ")") } DisplayFormatType::TreeRender => { - if !self.config.table_paths.is_empty() { - TablePathsDisplay(&self.config.table_paths).fmt_as(t, f)?; - } - Ok(()) + writeln!(f, "format: csv")?; + write!(f, "file={}", &self.config.original_url) } } } diff --git a/datafusion/datasource-json/src/file_format.rs b/datafusion/datasource-json/src/file_format.rs index 673937a3b109..9b6d5925fe81 100644 --- a/datafusion/datasource-json/src/file_format.rs +++ b/datafusion/datasource-json/src/file_format.rs @@ -37,7 +37,7 @@ use datafusion_common::{ }; use datafusion_common_runtime::SpawnedTask; use datafusion_datasource::decoder::Decoder; -use datafusion_datasource::display::{FileGroupDisplay, TablePathsDisplay}; +use datafusion_datasource::display::FileGroupDisplay; use datafusion_datasource::file::FileSource; use datafusion_datasource::file_compression_type::FileCompressionType; use datafusion_datasource::file_format::{ @@ -325,10 +325,8 @@ impl DisplayAs for JsonSink { write!(f, ")") } DisplayFormatType::TreeRender => { - if !self.config.table_paths.is_empty() { - TablePathsDisplay(&self.config.table_paths).fmt_as(t, f)?; - } - Ok(()) + writeln!(f, "format: json")?; + write!(f, "file={}", &self.config.original_url) } } } diff --git a/datafusion/datasource/src/display.rs b/datafusion/datasource/src/display.rs index d1fd3a6b278a..7ab8d407be52 100644 --- a/datafusion/datasource/src/display.rs +++ b/datafusion/datasource/src/display.rs @@ -19,7 +19,7 @@ use datafusion_physical_plan::{DisplayAs, DisplayFormatType}; use std::fmt::{Debug, Formatter, Result as FmtResult}; -use crate::{ListingTableUrl, PartitionedFile}; +use crate::PartitionedFile; /// A wrapper to customize partitioned file display /// @@ -91,36 +91,6 @@ impl DisplayAs for FileGroupDisplay<'_> { } } -/// A wrapper to customize table paths display -/// -/// Prints in the format: -/// ```text -/// table_path1, table_path2, ... -/// ``` -#[derive(Debug)] -pub struct TablePathsDisplay<'a>(pub &'a [ListingTableUrl]); - -impl DisplayAs for TablePathsDisplay<'_> { - fn fmt_as(&self, t: DisplayFormatType, f: &mut Formatter) -> FmtResult { - match t { - DisplayFormatType::Default | DisplayFormatType::TreeRender => { - // To avoid showing too many paths - let max_paths = 5; - fmt_up_to_n_elements(self.0, max_paths, f, |path, f| { - write!(f, "{}", path.as_str()) - })?; - } - DisplayFormatType::Verbose => { - fmt_elements_split_by_commas(self.0.iter(), f, |path, f| { - write!(f, "{}", path.as_str()) - })?; - } - } - - Ok(()) - } -} - /// helper to format an array of up to N elements fn fmt_up_to_n_elements( elements: &[E], @@ -169,7 +139,6 @@ mod tests { use object_store::{path::Path, ObjectMeta}; use chrono::Utc; - use url::Url; #[test] fn file_groups_display_empty() { @@ -323,86 +292,4 @@ mod tests { metadata_size_hint: None, } } - - #[test] - fn table_paths_display_empty() { - let expected = ""; - assert_eq!(DefaultDisplay(TablePathsDisplay(&[])).to_string(), expected); - } - - #[test] - fn table_paths_display_one() { - let url = Url::parse("file:///foo/bar.csv").unwrap(); - let table_path = ListingTableUrl::parse(url).unwrap(); - let paths = [table_path]; - - let expected = "file:///foo/bar.csv"; - assert_eq!( - DefaultDisplay(TablePathsDisplay(&paths)).to_string(), - expected - ); - } - - #[test] - fn table_paths_display_many_default() { - let urls = vec![ - Url::parse("file:///foo/a.csv").unwrap(), - Url::parse("file:///foo/b.csv").unwrap(), - Url::parse("file:///foo/c.csv").unwrap(), - ]; - let paths: Vec<_> = urls - .into_iter() - .map(|url| ListingTableUrl::parse(url).unwrap()) - .collect(); - - let expected = "file:///foo/a.csv, file:///foo/b.csv, file:///foo/c.csv"; - assert_eq!( - DefaultDisplay(TablePathsDisplay(&paths)).to_string(), - expected - ); - } - - #[test] - fn table_paths_display_too_many_default() { - let urls = vec![ - Url::parse("file:///foo/a.csv").unwrap(), - Url::parse("file:///foo/b.csv").unwrap(), - Url::parse("file:///foo/c.csv").unwrap(), - Url::parse("file:///foo/d.csv").unwrap(), - Url::parse("file:///foo/e.csv").unwrap(), - Url::parse("file:///foo/f.csv").unwrap(), - ]; - let paths: Vec<_> = urls - .into_iter() - .map(|url| ListingTableUrl::parse(url).unwrap()) - .collect(); - - let expected = "file:///foo/a.csv, file:///foo/b.csv, file:///foo/c.csv, file:///foo/d.csv, file:///foo/e.csv, ..."; - assert_eq!( - DefaultDisplay(TablePathsDisplay(&paths)).to_string(), - expected - ); - } - - #[test] - fn table_paths_display_too_many_verbose() { - let urls = vec![ - Url::parse("file:///foo/a.csv").unwrap(), - Url::parse("file:///foo/b.csv").unwrap(), - Url::parse("file:///foo/c.csv").unwrap(), - Url::parse("file:///foo/d.csv").unwrap(), - Url::parse("file:///foo/e.csv").unwrap(), - Url::parse("file:///foo/f.csv").unwrap(), - ]; - let paths: Vec<_> = urls - .into_iter() - .map(|url| ListingTableUrl::parse(url).unwrap()) - .collect(); - - let expected = "file:///foo/a.csv, file:///foo/b.csv, file:///foo/c.csv, file:///foo/d.csv, file:///foo/e.csv, file:///foo/f.csv"; - assert_eq!( - VerboseDisplay(TablePathsDisplay(&paths)).to_string(), - expected - ); - } } diff --git a/datafusion/datasource/src/file_sink_config.rs b/datafusion/datasource/src/file_sink_config.rs index 6087f930d3fe..279c9d2100ec 100644 --- a/datafusion/datasource/src/file_sink_config.rs +++ b/datafusion/datasource/src/file_sink_config.rs @@ -86,6 +86,8 @@ pub trait FileSink: DataSink { /// The base configurations to provide when creating a physical plan for /// writing to any given file format. pub struct FileSinkConfig { + /// The unresolved URL specified by the user + pub original_url: String, /// Object store URL, used to get an ObjectStore instance pub object_store_url: ObjectStoreUrl, /// A vector of [`PartitionedFile`] structs, each representing a file partition diff --git a/datafusion/proto/src/physical_plan/from_proto.rs b/datafusion/proto/src/physical_plan/from_proto.rs index 6331b7fb3114..0bf9fdb63d59 100644 --- a/datafusion/proto/src/physical_plan/from_proto.rs +++ b/datafusion/proto/src/physical_plan/from_proto.rs @@ -657,6 +657,7 @@ impl TryFrom<&protobuf::FileSinkConfig> for FileSinkConfig { protobuf::InsertOp::Replace => InsertOp::Replace, }; Ok(Self { + original_url: String::default(), object_store_url: ObjectStoreUrl::parse(&conf.object_store_url)?, file_groups, table_paths, diff --git a/datafusion/sqllogictest/test_files/explain_tree.slt b/datafusion/sqllogictest/test_files/explain_tree.slt index 0397e0b06c52..dbb24ac2f16d 100644 --- a/datafusion/sqllogictest/test_files/explain_tree.slt +++ b/datafusion/sqllogictest/test_files/explain_tree.slt @@ -1711,57 +1711,72 @@ physical_plan query TT explain COPY (VALUES (1, 'foo', 1, '2023-01-01'), (2, 'bar', 2, '2023-01-02'), (3, 'baz', 3, '2023-01-03')) -TO '/tmp/1.json'; +TO 'test_files/scratch/explain_tree/1.json'; ---- physical_plan 01)┌───────────────────────────┐ 02)│ DataSinkExec │ 03)│ -------------------- │ -04)│ file:///tmp/1.json │ -05)└─────────────┬─────────────┘ -06)┌─────────────┴─────────────┐ -07)│ DataSourceExec │ -08)│ -------------------- │ -09)│ bytes: 2672 │ -10)│ format: memory │ -11)│ rows: 1 │ -12)└───────────────────────────┘ +04)│ file: │ +05)│ test_files/scratch │ +06)│ /explain_tree/1 │ +07)│ .json │ +08)│ │ +09)│ format: json │ +10)└─────────────┬─────────────┘ +11)┌─────────────┴─────────────┐ +12)│ DataSourceExec │ +13)│ -------------------- │ +14)│ bytes: 2672 │ +15)│ format: memory │ +16)│ rows: 1 │ +17)└───────────────────────────┘ query TT explain COPY (VALUES (1, 'foo', 1, '2023-01-01'), (2, 'bar', 2, '2023-01-02'), (3, 'baz', 3, '2023-01-03')) -TO '/tmp/2.csv'; +TO 'test_files/scratch/explain_tree/2.csv'; ---- physical_plan 01)┌───────────────────────────┐ 02)│ DataSinkExec │ 03)│ -------------------- │ -04)│ file:///tmp/2.csv │ -05)└─────────────┬─────────────┘ -06)┌─────────────┴─────────────┐ -07)│ DataSourceExec │ -08)│ -------------------- │ -09)│ bytes: 2672 │ -10)│ format: memory │ -11)│ rows: 1 │ -12)└───────────────────────────┘ +04)│ file: │ +05)│ test_files/scratch │ +06)│ /explain_tree/2 │ +07)│ .csv │ +08)│ │ +09)│ format: csv │ +10)└─────────────┬─────────────┘ +11)┌─────────────┴─────────────┐ +12)│ DataSourceExec │ +13)│ -------------------- │ +14)│ bytes: 2672 │ +15)│ format: memory │ +16)│ rows: 1 │ +17)└───────────────────────────┘ query TT explain COPY (VALUES (1, 'foo', 1, '2023-01-01'), (2, 'bar', 2, '2023-01-02'), (3, 'baz', 3, '2023-01-03')) -TO '/tmp/3.arrow'; +TO 'test_files/scratch/explain_tree/3.arrow'; ---- physical_plan 01)┌───────────────────────────┐ 02)│ DataSinkExec │ 03)│ -------------------- │ -04)│ file:///tmp/3.arrow │ -05)└─────────────┬─────────────┘ -06)┌─────────────┴─────────────┐ -07)│ DataSourceExec │ -08)│ -------------------- │ -09)│ bytes: 2672 │ -10)│ format: memory │ -11)│ rows: 1 │ -12)└───────────────────────────┘ +04)│ file: │ +05)│ test_files/scratch │ +06)│ /explain_tree/3 │ +07)│ .arrow │ +08)│ │ +09)│ format: arrow │ +10)└─────────────┬─────────────┘ +11)┌─────────────┴─────────────┐ +12)│ DataSourceExec │ +13)│ -------------------- │ +14)│ bytes: 2672 │ +15)│ format: memory │ +16)│ rows: 1 │ +17)└───────────────────────────┘ # Test explain tree rendering for CoalesceBatchesExec with limit From 30ecfa27c54df8ee08cdcbc559fcdf8e6f5ad98a Mon Sep 17 00:00:00 2001 From: irenjj Date: Fri, 14 Mar 2025 21:01:41 +0800 Subject: [PATCH 3/3] fix doc test --- datafusion/core/src/datasource/file_format/parquet.rs | 3 +++ datafusion/proto/tests/cases/roundtrip_physical_plan.rs | 3 +++ 2 files changed, 6 insertions(+) diff --git a/datafusion/core/src/datasource/file_format/parquet.rs b/datafusion/core/src/datasource/file_format/parquet.rs index e2c7d1ecafa3..3b71593b3334 100644 --- a/datafusion/core/src/datasource/file_format/parquet.rs +++ b/datafusion/core/src/datasource/file_format/parquet.rs @@ -1373,6 +1373,7 @@ mod tests { let object_store_url = ObjectStoreUrl::local_filesystem(); let file_sink_config = FileSinkConfig { + original_url: String::default(), object_store_url: object_store_url.clone(), file_groups: vec![PartitionedFile::new("/tmp".to_string(), 1)], table_paths: vec![ListingTableUrl::parse(table_path)?], @@ -1458,6 +1459,7 @@ mod tests { // set file config to include partitioning on field_a let file_sink_config = FileSinkConfig { + original_url: String::default(), object_store_url: object_store_url.clone(), file_groups: vec![PartitionedFile::new("/tmp".to_string(), 1)], table_paths: vec![ListingTableUrl::parse("file:///")?], @@ -1541,6 +1543,7 @@ mod tests { let object_store_url = ObjectStoreUrl::local_filesystem(); let file_sink_config = FileSinkConfig { + original_url: String::default(), object_store_url: object_store_url.clone(), file_groups: vec![PartitionedFile::new("/tmp".to_string(), 1)], table_paths: vec![ListingTableUrl::parse("file:///")?], diff --git a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs index b5bfef99a6f3..21622390c6d0 100644 --- a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs +++ b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs @@ -1281,6 +1281,7 @@ fn roundtrip_json_sink() -> Result<()> { let input = Arc::new(PlaceholderRowExec::new(schema.clone())); let file_sink_config = FileSinkConfig { + original_url: String::default(), object_store_url: ObjectStoreUrl::local_filesystem(), file_groups: vec![PartitionedFile::new("/tmp".to_string(), 1)], table_paths: vec![ListingTableUrl::parse("file:///")?], @@ -1317,6 +1318,7 @@ fn roundtrip_csv_sink() -> Result<()> { let input = Arc::new(PlaceholderRowExec::new(schema.clone())); let file_sink_config = FileSinkConfig { + original_url: String::default(), object_store_url: ObjectStoreUrl::local_filesystem(), file_groups: vec![PartitionedFile::new("/tmp".to_string(), 1)], table_paths: vec![ListingTableUrl::parse("file:///")?], @@ -1372,6 +1374,7 @@ fn roundtrip_parquet_sink() -> Result<()> { let input = Arc::new(PlaceholderRowExec::new(schema.clone())); let file_sink_config = FileSinkConfig { + original_url: String::default(), object_store_url: ObjectStoreUrl::local_filesystem(), file_groups: vec![PartitionedFile::new("/tmp".to_string(), 1)], table_paths: vec![ListingTableUrl::parse("file:///")?],