Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 2 additions & 6 deletions datafusion/datasource-avro/src/avro_to_arrow/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,13 +64,9 @@ impl ReaderBuilder {
/// let file = File::open("test/data/basic.avro").unwrap();
///
/// // create a builder, inferring the schema with the first 100 records
/// let builder = ReaderBuilder::new()
/// .read_schema()
/// .with_batch_size(100);
/// let builder = ReaderBuilder::new().read_schema().with_batch_size(100);
///
/// let reader = builder
/// .build::<File>(file)
/// .unwrap();
/// let reader = builder.build::<File>(file).unwrap();
///
/// reader
/// }
Expand Down
1 change: 0 additions & 1 deletion datafusion/datasource-parquet/src/page_filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,6 @@ use parquet::{
/// ━━━ ━━━ ━━━ ━━━ ━━━ ━━━ ━━━ ━━━ ━━━ ━━━ ━━━ ━━━ ━━┛
///
/// Total rows: 300
///
/// ```
///
/// Given the predicate `A > 35 AND B = 'F'`:
Expand Down
2 changes: 0 additions & 2 deletions datafusion/datasource-parquet/src/source.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,6 @@ use parquet::encryption::decrypt::FileDecryptionProperties;
/// │.───────────────────.│
/// │ )
/// `───────────────────'
///
/// ```
///
/// # Example: Create a `DataSourceExec`
Expand Down Expand Up @@ -349,7 +348,6 @@ impl ParquetSource {
}

/// Optional user defined parquet file reader factory.
///
pub fn with_parquet_file_reader_factory(
mut self,
parquet_file_reader_factory: Arc<dyn ParquetFileReaderFactory>,
Expand Down
68 changes: 34 additions & 34 deletions datafusion/datasource/src/file_scan_config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1388,51 +1388,51 @@ fn create_output_array(
/// correctly sorted on `(A, B, C)`
///
/// ```text
///┏ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ┓
/// ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┐ ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┌ ─ ─ ─ ─ ─ ─ ─ ─ ┐
///┃ ┌───────────────┐ ┌──────────────┐ │ ┌──────────────┐ │ ┌─────────────┐ ┃
/// │ │ 1.parquet │ │ │ │ 2.parquet │ │ │ 3.parquet │ │ │ 4.parquet │ │
///┃ │ Sort: A, B, C │ │Sort: A, B, C │ │ │Sort: A, B, C │ │ │Sort: A, B, C│ ┃
/// │ └───────────────┘ │ │ └──────────────┘ │ └──────────────┘ │ └─────────────┘ │
///┃ │ │ ┃
/// │ │ │ │ │ │
///┃ │ │ ┃
/// │ │ │ │ │ │
///┃ │ │ ┃
/// │ │ │ │ │ │
///┃ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┘ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┘ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┃
/// DataFusion DataFusion DataFusion DataFusion
///┃ Partition 1 Partition 2 Partition 3 Partition 4 ┃
/// ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━
/// ┏ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ┓
/// ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┐ ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┌ ─ ─ ─ ─ ─ ─ ─ ─ ┐
/// ┃ ┌───────────────┐ ┌──────────────┐ │ ┌──────────────┐ │ ┌─────────────┐ ┃
/// │ │ 1.parquet │ │ │ │ 2.parquet │ │ │ 3.parquet │ │ │ 4.parquet │ │
/// ┃ │ Sort: A, B, C │ │Sort: A, B, C │ │ │Sort: A, B, C │ │ │Sort: A, B, C│ ┃
/// │ └───────────────┘ │ │ └──────────────┘ │ └──────────────┘ │ └─────────────┘ │
/// ┃ │ │ ┃
/// │ │ │ │ │ │
/// ┃ │ │ ┃
/// │ │ │ │ │ │
/// ┃ │ │ ┃
/// │ │ │ │ │ │
/// ┃ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┘ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┘ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┃
/// DataFusion DataFusion DataFusion DataFusion
/// ┃ Partition 1 Partition 2 Partition 3 Partition 4 ┃
/// ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━
///
/// DataSourceExec
///```
/// ```
///
/// However, when more than 1 file is assigned to each partition, each
/// partition is NOT correctly sorted on `(A, B, C)`. Once the second
/// file is scanned, the same values for A, B and C can be repeated in
/// the same sorted stream
///
///```text
///┏ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━
/// ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┐ ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┃
///┃ ┌───────────────┐ ┌──────────────┐ │
/// │ │ 1.parquet │ │ │ │ 2.parquet │ ┃
///┃ │ Sort: A, B, C │ │Sort: A, B, C │ │
/// │ └───────────────┘ │ │ └──────────────┘ ┃
///┃ ┌───────────────┐ ┌──────────────┐ │
/// │ │ 3.parquet │ │ │ │ 4.parquet │ ┃
///┃ │ Sort: A, B, C │ │Sort: A, B, C │ │
/// │ └───────────────┘ │ │ └──────────────┘ ┃
///┃ │
/// │ │ │ ┃
///┃ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┘
/// DataFusion DataFusion ┃
///┃ Partition 1 Partition 2
/// ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ┛
/// ┏ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━
/// ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┐ ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┃
/// ┃ ┌───────────────┐ ┌──────────────┐ │
/// │ │ 1.parquet │ │ │ │ 2.parquet │ ┃
/// ┃ │ Sort: A, B, C │ │Sort: A, B, C │ │
/// │ └───────────────┘ │ │ └──────────────┘ ┃
/// ┃ ┌───────────────┐ ┌──────────────┐ │
/// │ │ 3.parquet │ │ │ │ 4.parquet │ ┃
/// ┃ │ Sort: A, B, C │ │Sort: A, B, C │ │
/// │ └───────────────┘ │ │ └──────────────┘ ┃
/// ┃ │
/// │ │ │ ┃
/// ┃ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┘
/// DataFusion DataFusion ┃
/// ┃ Partition 1 Partition 2
/// ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ┛
///
/// DataSourceExec
///```
/// ```
fn get_projected_output_ordering(
base_config: &FileScanConfig,
projected_schema: &SchemaRef,
Expand Down
1 change: 0 additions & 1 deletion datafusion/datasource/src/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,6 @@ pub async fn calculate_range(
/// Returns a `Result` wrapping a `usize` that represents the position of the first newline character found within the specified range. If no newline is found, it returns the length of the scanned data, effectively indicating the end of the range.
///
/// The function returns an `Error` if any issues arise while reading from the object store or processing the data stream.
///
async fn find_first_newline(
object_store: &Arc<dyn ObjectStore>,
location: &Path,
Expand Down
1 change: 0 additions & 1 deletion datafusion/datasource/src/url.rs
Original file line number Diff line number Diff line change
Expand Up @@ -385,7 +385,6 @@ const GLOB_START_CHARS: [char; 3] = ['?', '*', '['];
///
/// Path delimiters are determined using [`std::path::is_separator`] which
/// permits `/` as a path delimiter even on Windows platforms.
///
#[cfg(not(target_arch = "wasm32"))]
fn split_glob_expression(path: &str) -> Option<(&str, &str)> {
let mut last_separator = 0;
Expand Down
12 changes: 10 additions & 2 deletions datafusion/datasource/src/write/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,11 @@ impl ObjectWriterBuilder {
/// # let object_store = Arc::new(InMemory::new());
/// let mut builder = ObjectWriterBuilder::new(compression_type, &location, object_store);
/// builder.set_buffer_size(Some(20 * 1024 * 1024)); //20 MiB
/// assert_eq!(builder.get_buffer_size(), Some(20 * 1024 * 1024), "Internal error: Builder buffer size doesn't match");
/// assert_eq!(
/// builder.get_buffer_size(),
/// Some(20 * 1024 * 1024),
/// "Internal error: Builder buffer size doesn't match"
/// );
/// ```
pub fn set_buffer_size(&mut self, buffer_size: Option<usize>) {
self.buffer_size = buffer_size;
Expand All @@ -182,7 +186,11 @@ impl ObjectWriterBuilder {
/// # let object_store = Arc::new(InMemory::new());
/// let builder = ObjectWriterBuilder::new(compression_type, &location, object_store)
/// .with_buffer_size(Some(20 * 1024 * 1024)); //20 MiB
/// assert_eq!(builder.get_buffer_size(), Some(20 * 1024 * 1024), "Internal error: Builder buffer size doesn't match");
/// assert_eq!(
/// builder.get_buffer_size(),
/// Some(20 * 1024 * 1024),
/// "Internal error: Builder buffer size doesn't match"
/// );
/// ```
pub fn with_buffer_size(mut self, buffer_size: Option<usize>) -> Self {
self.buffer_size = buffer_size;
Expand Down