diff --git a/datafusion/datasource-avro/src/avro_to_arrow/reader.rs b/datafusion/datasource-avro/src/avro_to_arrow/reader.rs index 9a4d13fc191d..5ef35e2bee89 100644 --- a/datafusion/datasource-avro/src/avro_to_arrow/reader.rs +++ b/datafusion/datasource-avro/src/avro_to_arrow/reader.rs @@ -64,13 +64,9 @@ impl ReaderBuilder { /// let file = File::open("test/data/basic.avro").unwrap(); /// /// // create a builder, inferring the schema with the first 100 records - /// let builder = ReaderBuilder::new() - /// .read_schema() - /// .with_batch_size(100); + /// let builder = ReaderBuilder::new().read_schema().with_batch_size(100); /// - /// let reader = builder - /// .build::(file) - /// .unwrap(); + /// let reader = builder.build::(file).unwrap(); /// /// reader /// } diff --git a/datafusion/datasource-parquet/src/page_filter.rs b/datafusion/datasource-parquet/src/page_filter.rs index 65d1affb44a9..82deedd406ce 100644 --- a/datafusion/datasource-parquet/src/page_filter.rs +++ b/datafusion/datasource-parquet/src/page_filter.rs @@ -90,7 +90,6 @@ use parquet::{ /// ━━━ ━━━ ━━━ ━━━ ━━━ ━━━ ━━━ ━━━ ━━━ ━━━ ━━━ ━━━ ━━┛ /// /// Total rows: 300 -/// /// ``` /// /// Given the predicate `A > 35 AND B = 'F'`: diff --git a/datafusion/datasource-parquet/src/source.rs b/datafusion/datasource-parquet/src/source.rs index 186d922fc373..b7c29f615a19 100644 --- a/datafusion/datasource-parquet/src/source.rs +++ b/datafusion/datasource-parquet/src/source.rs @@ -85,7 +85,6 @@ use parquet::encryption::decrypt::FileDecryptionProperties; /// │.───────────────────.│ /// │ ) /// `───────────────────' -/// /// ``` /// /// # Example: Create a `DataSourceExec` @@ -349,7 +348,6 @@ impl ParquetSource { } /// Optional user defined parquet file reader factory. - /// pub fn with_parquet_file_reader_factory( mut self, parquet_file_reader_factory: Arc, diff --git a/datafusion/datasource/src/file_scan_config.rs b/datafusion/datasource/src/file_scan_config.rs index c52397d9a7cc..072922eb8920 100644 --- a/datafusion/datasource/src/file_scan_config.rs +++ b/datafusion/datasource/src/file_scan_config.rs @@ -1388,25 +1388,25 @@ fn create_output_array( /// correctly sorted on `(A, B, C)` /// /// ```text -///┏ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ┓ -/// ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┐ ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┌ ─ ─ ─ ─ ─ ─ ─ ─ ┐ -///┃ ┌───────────────┐ ┌──────────────┐ │ ┌──────────────┐ │ ┌─────────────┐ ┃ -/// │ │ 1.parquet │ │ │ │ 2.parquet │ │ │ 3.parquet │ │ │ 4.parquet │ │ -///┃ │ Sort: A, B, C │ │Sort: A, B, C │ │ │Sort: A, B, C │ │ │Sort: A, B, C│ ┃ -/// │ └───────────────┘ │ │ └──────────────┘ │ └──────────────┘ │ └─────────────┘ │ -///┃ │ │ ┃ -/// │ │ │ │ │ │ -///┃ │ │ ┃ -/// │ │ │ │ │ │ -///┃ │ │ ┃ -/// │ │ │ │ │ │ -///┃ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┘ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┘ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┃ -/// DataFusion DataFusion DataFusion DataFusion -///┃ Partition 1 Partition 2 Partition 3 Partition 4 ┃ -/// ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ +/// ┏ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ┓ +/// ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┐ ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┌ ─ ─ ─ ─ ─ ─ ─ ─ ┐ +/// ┃ ┌───────────────┐ ┌──────────────┐ │ ┌──────────────┐ │ ┌─────────────┐ ┃ +/// │ │ 1.parquet │ │ │ │ 2.parquet │ │ │ 3.parquet │ │ │ 4.parquet │ │ +/// ┃ │ Sort: A, B, C │ │Sort: A, B, C │ │ │Sort: A, B, C │ │ │Sort: A, B, C│ ┃ +/// │ └───────────────┘ │ │ └──────────────┘ │ └──────────────┘ │ └─────────────┘ │ +/// ┃ │ │ ┃ +/// │ │ │ │ │ │ +/// ┃ │ │ ┃ +/// │ │ │ │ │ │ +/// ┃ │ │ ┃ +/// │ │ │ │ │ │ +/// ┃ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┘ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┘ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┃ +/// DataFusion DataFusion DataFusion DataFusion +/// ┃ Partition 1 Partition 2 Partition 3 Partition 4 ┃ +/// ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ /// /// DataSourceExec -///``` +/// ``` /// /// However, when more than 1 file is assigned to each partition, each /// partition is NOT correctly sorted on `(A, B, C)`. Once the second @@ -1414,25 +1414,25 @@ fn create_output_array( /// the same sorted stream /// ///```text -///┏ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ -/// ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┐ ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┃ -///┃ ┌───────────────┐ ┌──────────────┐ │ -/// │ │ 1.parquet │ │ │ │ 2.parquet │ ┃ -///┃ │ Sort: A, B, C │ │Sort: A, B, C │ │ -/// │ └───────────────┘ │ │ └──────────────┘ ┃ -///┃ ┌───────────────┐ ┌──────────────┐ │ -/// │ │ 3.parquet │ │ │ │ 4.parquet │ ┃ -///┃ │ Sort: A, B, C │ │Sort: A, B, C │ │ -/// │ └───────────────┘ │ │ └──────────────┘ ┃ -///┃ │ -/// │ │ │ ┃ -///┃ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┘ -/// DataFusion DataFusion ┃ -///┃ Partition 1 Partition 2 -/// ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ┛ +/// ┏ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ +/// ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┐ ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┃ +/// ┃ ┌───────────────┐ ┌──────────────┐ │ +/// │ │ 1.parquet │ │ │ │ 2.parquet │ ┃ +/// ┃ │ Sort: A, B, C │ │Sort: A, B, C │ │ +/// │ └───────────────┘ │ │ └──────────────┘ ┃ +/// ┃ ┌───────────────┐ ┌──────────────┐ │ +/// │ │ 3.parquet │ │ │ │ 4.parquet │ ┃ +/// ┃ │ Sort: A, B, C │ │Sort: A, B, C │ │ +/// │ └───────────────┘ │ │ └──────────────┘ ┃ +/// ┃ │ +/// │ │ │ ┃ +/// ┃ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┘ +/// DataFusion DataFusion ┃ +/// ┃ Partition 1 Partition 2 +/// ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ┛ /// /// DataSourceExec -///``` +/// ``` fn get_projected_output_ordering( base_config: &FileScanConfig, projected_schema: &SchemaRef, diff --git a/datafusion/datasource/src/mod.rs b/datafusion/datasource/src/mod.rs index 80b44ad5949a..8d988bdb31be 100644 --- a/datafusion/datasource/src/mod.rs +++ b/datafusion/datasource/src/mod.rs @@ -310,7 +310,6 @@ pub async fn calculate_range( /// Returns a `Result` wrapping a `usize` that represents the position of the first newline character found within the specified range. If no newline is found, it returns the length of the scanned data, effectively indicating the end of the range. /// /// The function returns an `Error` if any issues arise while reading from the object store or processing the data stream. -/// async fn find_first_newline( object_store: &Arc, location: &Path, diff --git a/datafusion/datasource/src/url.rs b/datafusion/datasource/src/url.rs index 0f31eb7caf41..08e5b6a5df83 100644 --- a/datafusion/datasource/src/url.rs +++ b/datafusion/datasource/src/url.rs @@ -385,7 +385,6 @@ const GLOB_START_CHARS: [char; 3] = ['?', '*', '[']; /// /// Path delimiters are determined using [`std::path::is_separator`] which /// permits `/` as a path delimiter even on Windows platforms. -/// #[cfg(not(target_arch = "wasm32"))] fn split_glob_expression(path: &str) -> Option<(&str, &str)> { let mut last_separator = 0; diff --git a/datafusion/datasource/src/write/mod.rs b/datafusion/datasource/src/write/mod.rs index 3694568682a5..85832f81bc18 100644 --- a/datafusion/datasource/src/write/mod.rs +++ b/datafusion/datasource/src/write/mod.rs @@ -162,7 +162,11 @@ impl ObjectWriterBuilder { /// # let object_store = Arc::new(InMemory::new()); /// let mut builder = ObjectWriterBuilder::new(compression_type, &location, object_store); /// builder.set_buffer_size(Some(20 * 1024 * 1024)); //20 MiB - /// assert_eq!(builder.get_buffer_size(), Some(20 * 1024 * 1024), "Internal error: Builder buffer size doesn't match"); + /// assert_eq!( + /// builder.get_buffer_size(), + /// Some(20 * 1024 * 1024), + /// "Internal error: Builder buffer size doesn't match" + /// ); /// ``` pub fn set_buffer_size(&mut self, buffer_size: Option) { self.buffer_size = buffer_size; @@ -182,7 +186,11 @@ impl ObjectWriterBuilder { /// # let object_store = Arc::new(InMemory::new()); /// let builder = ObjectWriterBuilder::new(compression_type, &location, object_store) /// .with_buffer_size(Some(20 * 1024 * 1024)); //20 MiB - /// assert_eq!(builder.get_buffer_size(), Some(20 * 1024 * 1024), "Internal error: Builder buffer size doesn't match"); + /// assert_eq!( + /// builder.get_buffer_size(), + /// Some(20 * 1024 * 1024), + /// "Internal error: Builder buffer size doesn't match" + /// ); /// ``` pub fn with_buffer_size(mut self, buffer_size: Option) -> Self { self.buffer_size = buffer_size;