Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Made IPC projection not sort projection #1082

Merged
merged 1 commit into from
Jun 19, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 18 additions & 18 deletions src/io/ipc/read/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -285,31 +285,31 @@ pub fn prepare_projection(

let fields = projection.iter().map(|x| fields[*x].clone()).collect();

// selected index; index in
let sorted_projection = projection
.iter()
.copied()
.enumerate()
.map(|x| (x.1, x.0))
.collect::<HashMap<_, _>>(); // e.g. [2, 1] -> {2: 0, 1: 1}
projection.sort_unstable(); // e.g. [2, 1] -> [1, 2]
// todo: find way to do this more efficiently
let mut indices = (0..projection.len()).collect::<Vec<_>>();
indices.sort_unstable_by_key(|&i| &projection[i]);
let map = indices.iter().copied().enumerate().fold(
HashMap::default(),
|mut acc, (index, new_index)| {
if !acc.contains_key(&new_index) {
acc.insert(index, new_index);
};
acc
},
);
projection.sort_unstable();

(projection, sorted_projection, fields)
(projection, map, fields)
}

pub fn apply_projection(
chunk: Chunk<Box<dyn Array>>,
projection: &[usize],
map: &HashMap<usize, usize>,
) -> Chunk<Box<dyn Array>> {
// re-order according to projection
let arrays = chunk.into_arrays();
let arrays = projection
.iter()
.map(|x| {
let index = map.get(x).unwrap();
arrays[*index].clone()
})
.collect();
let mut arrays = chunk.into_arrays();
map.iter().for_each(|(old, new)| {
arrays.swap(*old, *new);
});
Chunk::new(arrays)
}
4 changes: 2 additions & 2 deletions src/io/ipc/read/file_async.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,9 +89,9 @@ impl<'a> FileStream<'a> {
&mut block_buffer,
).await?;

let chunk = if let Some((projection, map)) = &projection {
let chunk = if let Some((_, map)) = &projection {
// re-order according to projection
apply_projection(chunk, projection, map)
apply_projection(chunk, map)
} else {
chunk
};
Expand Down
4 changes: 2 additions & 2 deletions src/io/ipc/read/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -342,9 +342,9 @@ impl<R: Read + Seek> Iterator for FileReader<R> {
&mut self.buffer,
);

let chunk = if let Some((projection, map, _)) = &self.projection {
let chunk = if let Some((_, map, _)) = &self.projection {
// re-order according to projection
chunk.map(|chunk| apply_projection(chunk, projection, map))
chunk.map(|chunk| apply_projection(chunk, map))
} else {
chunk
};
Expand Down
26 changes: 20 additions & 6 deletions tests/it/io/ipc/read/file.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use std::fs::File;

use arrow2::chunk::Chunk;
use arrow2::error::Result;
use arrow2::io::ipc::read::*;

Expand Down Expand Up @@ -166,18 +167,29 @@ fn test_projection(version: &str, file_name: &str, columns: Vec<usize>) -> Resul

let metadata = read_file_metadata(&mut file)?;

let expected = columns
let (_, _, chunks) = read_gzip_json(version, file_name)?;

let expected_fields = columns
.iter()
.copied()
.map(|x| metadata.schema.fields[x].clone())
.collect::<Vec<_>>();

let mut reader = FileReader::new(&mut file, metadata, Some(columns));
let expected_chunks = chunks.into_iter().map(|chunk| {
let columns = columns
.iter()
.copied()
.map(|x| chunk.arrays()[x].clone())
.collect::<Vec<_>>();
Chunk::new(columns)
});

let reader = FileReader::new(&mut file, metadata, Some(columns.clone()));

assert_eq!(reader.schema().fields, expected);
assert_eq!(reader.schema().fields, expected_fields);

reader.try_for_each(|rhs| {
assert_eq!(rhs?.arrays().len(), expected.len());
reader.zip(expected_chunks).try_for_each(|(lhs, rhs)| {
assert_eq!(&lhs?.arrays()[0], &rhs.arrays()[0]);
Result::Ok(())
})?;
Ok(())
Expand All @@ -189,5 +201,7 @@ fn read_projected() -> Result<()> {
test_projection("1.0.0-littleendian", "generated_dictionary", vec![2])?;
test_projection("1.0.0-littleendian", "generated_nested", vec![0])?;

test_projection("1.0.0-littleendian", "generated_primitive", vec![2, 1])
test_projection("1.0.0-littleendian", "generated_primitive", vec![2, 1])?;

test_projection("1.0.0-littleendian", "generated_primitive", vec![0, 2, 1])
}