Skip to content

Commit 10aae0f

Browse files
committed
Merge branch 'alamb/parquet_decoder' of github.com:alamb/arrow-rs into alamb/parquet_decoder
2 parents 0186759 + 7ccdb1a commit 10aae0f

File tree

3 files changed

+12
-6
lines changed

3 files changed

+12
-6
lines changed

parquet/src/arrow/in_memory_row_group.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -256,10 +256,14 @@ impl RowGroups for InMemoryRowGroup<'_> {
256256
}
257257
}
258258

259-
/// An in-memory column chunk
259+
/// An in-memory column chunk.
260+
/// This allows us to hold either dense column chunks or sparse column chunks and easily
261+
/// access them by offset.
260262
#[derive(Clone, Debug)]
261263
pub(crate) enum ColumnChunkData {
262-
/// Column chunk data representing only a subset of data pages
264+
/// Column chunk data representing only a subset of data pages.
265+
/// For example if a row selection (possibly caused by a filter in a query) causes us to read only
266+
/// a subset of the rows in the column.
263267
Sparse {
264268
/// Length of the full column chunk
265269
length: usize,

parquet/src/arrow/push_decoder/reader_builder/filter.rs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,9 @@ pub(super) struct FilterInfo {
4747
/// but it owns the ProjectionMask and RowGroupCache
4848
#[derive(Debug)]
4949
pub(super) struct CacheInfo {
50-
/// The columns to cache in the predicate cache
50+
/// The columns to cache in the predicate cache.
51+
/// Normally these are the columns that filters may look at such that
52+
/// if we have a filter like `(a + 10 > 5) AND (a + b = 0)` we cache `a` to avoid re-reading it between evaluating `a + 10 > 5` and `a + b = 0`.
5153
cache_projection: ProjectionMask,
5254
row_group_cache: Arc<Mutex<RowGroupCache>>,
5355
}
@@ -69,9 +71,9 @@ impl CacheInfo {
6971
}
7072

7173
pub(super) enum AdvanceResult {
72-
/// advanced to the next predicate
74+
/// Advanced to the next predicate
7375
Continue(FilterInfo),
74-
/// no more predicates returns the row filter and cache info
76+
/// No more predicates returns the row filter and cache info
7577
Done(RowFilter, CacheInfo),
7678
}
7779

parquet/src/arrow/push_decoder/reader_builder/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,7 @@ impl RowGroupReaderBuilder {
211211
/// call to [`Self::take_state`].
212212
///
213213
/// Any code that calls this method must ensure that the state is put back
214-
/// before returning, otherwise the reader error next time it is called
214+
/// before returning, otherwise the reader will error next time it is called
215215
fn take_state(&mut self) -> Result<RowGroupDecoderState, ParquetError> {
216216
self.state.take().ok_or_else(|| {
217217
ParquetError::General(String::from(

0 commit comments

Comments
 (0)