Skip to content

Commit b3c4f53

Browse files
authored
Fix doctests of parquet push decoded without default features (#8577)
# Which issue does this PR close? None. # Rationale for this change Make sure `cargo test -p parquet --doc --no-default-features` works. # What changes are included in this PR? Feature gate some doc tests on the `arrow` feature in the parquet crate. # Are these changes tested? `cargo test -p parquet --doc --no-default-features` # Are there any user-facing changes? No
1 parent 348ae91 commit b3c4f53

File tree

1 file changed

+141
-128
lines changed

1 file changed

+141
-128
lines changed

parquet/src/file/metadata/push_decoder.rs

Lines changed: 141 additions & 128 deletions
Original file line numberDiff line numberDiff line change
@@ -47,53 +47,58 @@ use std::ops::Range;
4747
/// requires the most IO operations - one to read the footer and then one
4848
/// to read the metadata, and possibly more if page indexes are requested.
4949
///
50-
/// ```rust
51-
/// # use std::ops::Range;
52-
/// # use bytes::Bytes;
53-
/// # use arrow_array::record_batch;
54-
/// # use parquet::DecodeResult;
55-
/// # use parquet::arrow::ArrowWriter;
56-
/// # use parquet::errors::ParquetError;
57-
/// # use parquet::file::metadata::{ParquetMetaData, ParquetMetaDataPushDecoder};
58-
/// #
59-
/// # fn decode_metadata() -> Result<ParquetMetaData, ParquetError> {
60-
/// # let file_bytes = {
61-
/// # let mut buffer = vec![0];
62-
/// # let batch = record_batch!(("a", Int32, [1, 2, 3])).unwrap();
63-
/// # let mut writer = ArrowWriter::try_new(&mut buffer, batch.schema(), None).unwrap();
64-
/// # writer.write(&batch).unwrap();
65-
/// # writer.close().unwrap();
66-
/// # Bytes::from(buffer)
67-
/// # };
68-
/// # // mimic IO by returning a function that returns the bytes for a given range
69-
/// # let get_range = |range: &Range<u64>| -> Bytes {
70-
/// # let start = range.start as usize;
71-
/// # let end = range.end as usize;
72-
/// # file_bytes.slice(start..end)
73-
/// # };
74-
/// #
75-
/// # let file_len = file_bytes.len() as u64;
76-
/// // The `ParquetMetaDataPushDecoder` needs to know the file length.
77-
/// let mut decoder = ParquetMetaDataPushDecoder::try_new(file_len).unwrap();
78-
/// // try to decode the metadata. If more data is needed, the decoder will tell you what ranges
79-
/// loop {
80-
/// match decoder.try_decode() {
81-
/// Ok(DecodeResult::Data(metadata)) => { return Ok(metadata); } // decode successful
82-
/// Ok(DecodeResult::NeedsData(ranges)) => {
83-
/// // The decoder needs more data
84-
/// //
85-
/// // In this example, we call a function that returns the bytes for each given range.
86-
/// // In a real application, you would likely read the data from a file or network.
87-
/// let data = ranges.iter().map(|range| get_range(range)).collect();
88-
/// // Push the data into the decoder and try to decode again on the next iteration.
89-
/// decoder.push_ranges(ranges, data).unwrap();
90-
/// }
91-
/// Ok(DecodeResult::Finished) => { unreachable!("returned metadata in previous match arm") }
92-
/// Err(e) => return Err(e),
93-
/// }
94-
/// }
95-
/// # }
96-
/// ```
50+
#[cfg_attr(
51+
feature = "arrow",
52+
doc = r##"
53+
```rust
54+
# use std::ops::Range;
55+
# use bytes::Bytes;
56+
# use arrow_array::record_batch;
57+
# use parquet::DecodeResult;
58+
# use parquet::arrow::ArrowWriter;
59+
# use parquet::errors::ParquetError;
60+
# use parquet::file::metadata::{ParquetMetaData, ParquetMetaDataPushDecoder};
61+
#
62+
# fn decode_metadata() -> Result<ParquetMetaData, ParquetError> {
63+
# let file_bytes = {
64+
# let mut buffer = vec![0];
65+
# let batch = record_batch!(("a", Int32, [1, 2, 3])).unwrap();
66+
# let mut writer = ArrowWriter::try_new(&mut buffer, batch.schema(), None).unwrap();
67+
# writer.write(&batch).unwrap();
68+
# writer.close().unwrap();
69+
# Bytes::from(buffer)
70+
# };
71+
# // mimic IO by returning a function that returns the bytes for a given range
72+
# let get_range = |range: &Range<u64>| -> Bytes {
73+
# let start = range.start as usize;
74+
# let end = range.end as usize;
75+
# file_bytes.slice(start..end)
76+
# };
77+
#
78+
# let file_len = file_bytes.len() as u64;
79+
// The `ParquetMetaDataPushDecoder` needs to know the file length.
80+
let mut decoder = ParquetMetaDataPushDecoder::try_new(file_len).unwrap();
81+
// try to decode the metadata. If more data is needed, the decoder will tell you what ranges
82+
loop {
83+
match decoder.try_decode() {
84+
Ok(DecodeResult::Data(metadata)) => { return Ok(metadata); } // decode successful
85+
Ok(DecodeResult::NeedsData(ranges)) => {
86+
// The decoder needs more data
87+
//
88+
// In this example, we call a function that returns the bytes for each given range.
89+
// In a real application, you would likely read the data from a file or network.
90+
let data = ranges.iter().map(|range| get_range(range)).collect();
91+
// Push the data into the decoder and try to decode again on the next iteration.
92+
decoder.push_ranges(ranges, data).unwrap();
93+
}
94+
Ok(DecodeResult::Finished) => { unreachable!("returned metadata in previous match arm") }
95+
Err(e) => return Err(e),
96+
}
97+
}
98+
# }
99+
```
100+
"##
101+
)]
97102
///
98103
/// # Example with "prefetching"
99104
///
@@ -114,44 +119,48 @@ use std::ops::Range;
114119
///
115120
/// This approach can also be used when you have the entire file already in memory
116121
/// for other reasons.
117-
///
118-
/// ```rust
119-
/// # use std::ops::Range;
120-
/// # use bytes::Bytes;
121-
/// # use arrow_array::record_batch;
122-
/// # use parquet::DecodeResult;
123-
/// # use parquet::arrow::ArrowWriter;
124-
/// # use parquet::errors::ParquetError;
125-
/// # use parquet::file::metadata::{ParquetMetaData, ParquetMetaDataPushDecoder};
126-
/// #
127-
/// # fn decode_metadata() -> Result<ParquetMetaData, ParquetError> {
128-
/// # let file_bytes = {
129-
/// # let mut buffer = vec![0];
130-
/// # let batch = record_batch!(("a", Int32, [1, 2, 3])).unwrap();
131-
/// # let mut writer = ArrowWriter::try_new(&mut buffer, batch.schema(), None).unwrap();
132-
/// # writer.write(&batch).unwrap();
133-
/// # writer.close().unwrap();
134-
/// # Bytes::from(buffer)
135-
/// # };
136-
/// #
137-
/// let file_len = file_bytes.len() as u64;
138-
/// // For this example, we "prefetch" all the bytes which we have in memory,
139-
/// // but in a real application, you would likely read a chunk from the end
140-
/// // for example 1MB.
141-
/// let prefetched_bytes = file_bytes.clone();
142-
/// let mut decoder = ParquetMetaDataPushDecoder::try_new(file_len).unwrap();
143-
/// // push the prefetched bytes into the decoder
144-
/// decoder.push_ranges(vec![0..file_len], vec![prefetched_bytes]).unwrap();
145-
/// // The decoder will now be able to decode the metadata. Note in a real application,
146-
/// // unless you can guarantee that the pushed data is enough to decode the metadata,
147-
/// // you still need to call `try_decode` in a loop until it returns `DecodeResult::Data`
148-
/// // as shown in the previous example
149-
/// match decoder.try_decode() {
150-
/// Ok(DecodeResult::Data(metadata)) => { return Ok(metadata); } // decode successful
151-
/// other => { panic!("expected DecodeResult::Data, got: {other:?}") }
152-
/// }
153-
/// # }
154-
/// ```
122+
#[cfg_attr(
123+
feature = "arrow",
124+
doc = r##"
125+
```rust
126+
# use std::ops::Range;
127+
# use bytes::Bytes;
128+
# use arrow_array::record_batch;
129+
# use parquet::DecodeResult;
130+
# use parquet::arrow::ArrowWriter;
131+
# use parquet::errors::ParquetError;
132+
# use parquet::file::metadata::{ParquetMetaData, ParquetMetaDataPushDecoder};
133+
#
134+
# fn decode_metadata() -> Result<ParquetMetaData, ParquetError> {
135+
# let file_bytes = {
136+
# let mut buffer = vec![0];
137+
# let batch = record_batch!(("a", Int32, [1, 2, 3])).unwrap();
138+
# let mut writer = ArrowWriter::try_new(&mut buffer, batch.schema(), None).unwrap();
139+
# writer.write(&batch).unwrap();
140+
# writer.close().unwrap();
141+
# Bytes::from(buffer)
142+
# };
143+
#
144+
let file_len = file_bytes.len() as u64;
145+
// For this example, we "prefetch" all the bytes which we have in memory,
146+
// but in a real application, you would likely read a chunk from the end
147+
// for example 1MB.
148+
let prefetched_bytes = file_bytes.clone();
149+
let mut decoder = ParquetMetaDataPushDecoder::try_new(file_len).unwrap();
150+
// push the prefetched bytes into the decoder
151+
decoder.push_ranges(vec![0..file_len], vec![prefetched_bytes]).unwrap();
152+
// The decoder will now be able to decode the metadata. Note in a real application,
153+
// unless you can guarantee that the pushed data is enough to decode the metadata,
154+
// you still need to call `try_decode` in a loop until it returns `DecodeResult::Data`
155+
// as shown in the previous example
156+
match decoder.try_decode() {
157+
Ok(DecodeResult::Data(metadata)) => { return Ok(metadata); } // decode successful
158+
other => { panic!("expected DecodeResult::Data, got: {other:?}") }
159+
}
160+
# }
161+
```
162+
"##
163+
)]
155164
///
156165
/// # Example using [`AsyncRead`]
157166
///
@@ -160,49 +169,53 @@ use std::ops::Range;
160169
/// implement async IO itself. To use async IO, you simply write an async
161170
/// wrapper around it that reads the required byte ranges and pushes them into the
162171
/// decoder.
163-
///
164-
/// ```rust
165-
/// # use std::ops::Range;
166-
/// # use bytes::Bytes;
167-
/// use tokio::io::{AsyncRead, AsyncReadExt, AsyncSeek, AsyncSeekExt};
168-
/// # use arrow_array::record_batch;
169-
/// # use parquet::DecodeResult;
170-
/// # use parquet::arrow::ArrowWriter;
171-
/// # use parquet::errors::ParquetError;
172-
/// # use parquet::file::metadata::{ParquetMetaData, ParquetMetaDataPushDecoder};
173-
/// #
174-
/// // This function decodes Parquet Metadata from anything that implements
175-
/// // [`AsyncRead`] and [`AsyncSeek`] such as a tokio::fs::File
176-
/// async fn decode_metadata(
177-
/// file_len: u64,
178-
/// mut async_source: impl AsyncRead + AsyncSeek + Unpin
179-
/// ) -> Result<ParquetMetaData, ParquetError> {
180-
/// // We need a ParquetMetaDataPushDecoder to decode the metadata.
181-
/// let mut decoder = ParquetMetaDataPushDecoder::try_new(file_len).unwrap();
182-
/// loop {
183-
/// match decoder.try_decode() {
184-
/// Ok(DecodeResult::Data(metadata)) => { return Ok(metadata); } // decode successful
185-
/// Ok(DecodeResult::NeedsData(ranges)) => {
186-
/// // The decoder needs more data
187-
/// //
188-
/// // In this example we use the AsyncRead and AsyncSeek traits to read the
189-
/// // required ranges from the async source.
190-
/// let mut data = Vec::with_capacity(ranges.len());
191-
/// for range in &ranges {
192-
/// let mut buffer = vec![0; (range.end - range.start) as usize];
193-
/// async_source.seek(std::io::SeekFrom::Start(range.start)).await?;
194-
/// async_source.read_exact(&mut buffer).await?;
195-
/// data.push(Bytes::from(buffer));
196-
/// }
197-
/// // Push the data into the decoder and try to decode again on the next iteration.
198-
/// decoder.push_ranges(ranges, data).unwrap();
199-
/// }
200-
/// Ok(DecodeResult::Finished) => { unreachable!("returned metadata in previous match arm") }
201-
/// Err(e) => return Err(e),
202-
/// }
203-
/// }
204-
/// }
205-
/// ```
172+
#[cfg_attr(
173+
feature = "arrow",
174+
doc = r##"
175+
```rust
176+
# use std::ops::Range;
177+
# use bytes::Bytes;
178+
use tokio::io::{AsyncRead, AsyncReadExt, AsyncSeek, AsyncSeekExt};
179+
# use arrow_array::record_batch;
180+
# use parquet::DecodeResult;
181+
# use parquet::arrow::ArrowWriter;
182+
# use parquet::errors::ParquetError;
183+
# use parquet::file::metadata::{ParquetMetaData, ParquetMetaDataPushDecoder};
184+
#
185+
// This function decodes Parquet Metadata from anything that implements
186+
// [`AsyncRead`] and [`AsyncSeek`] such as a tokio::fs::File
187+
async fn decode_metadata(
188+
file_len: u64,
189+
mut async_source: impl AsyncRead + AsyncSeek + Unpin
190+
) -> Result<ParquetMetaData, ParquetError> {
191+
// We need a ParquetMetaDataPushDecoder to decode the metadata.
192+
let mut decoder = ParquetMetaDataPushDecoder::try_new(file_len).unwrap();
193+
loop {
194+
match decoder.try_decode() {
195+
Ok(DecodeResult::Data(metadata)) => { return Ok(metadata); } // decode successful
196+
Ok(DecodeResult::NeedsData(ranges)) => {
197+
// The decoder needs more data
198+
//
199+
// In this example we use the AsyncRead and AsyncSeek traits to read the
200+
// required ranges from the async source.
201+
let mut data = Vec::with_capacity(ranges.len());
202+
for range in &ranges {
203+
let mut buffer = vec![0; (range.end - range.start) as usize];
204+
async_source.seek(std::io::SeekFrom::Start(range.start)).await?;
205+
async_source.read_exact(&mut buffer).await?;
206+
data.push(Bytes::from(buffer));
207+
}
208+
// Push the data into the decoder and try to decode again on the next iteration.
209+
decoder.push_ranges(ranges, data).unwrap();
210+
}
211+
Ok(DecodeResult::Finished) => { unreachable!("returned metadata in previous match arm") }
212+
Err(e) => return Err(e),
213+
}
214+
}
215+
}
216+
```
217+
"##
218+
)]
206219
/// [`AsyncRead`]: tokio::io::AsyncRead
207220
#[derive(Debug)]
208221
pub struct ParquetMetaDataPushDecoder {

0 commit comments

Comments
 (0)