Skip to content

Commit

Permalink
fix: Don't panic when reading empty files
Browse files Browse the repository at this point in the history
  • Loading branch information
aawsome committed Dec 6, 2024
1 parent 6a50c12 commit 0dae71f
Show file tree
Hide file tree
Showing 2 changed files with 83 additions and 39 deletions.
114 changes: 77 additions & 37 deletions crates/core/src/vfs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -455,17 +455,8 @@ impl Vfs {
#[derive(Debug)]
pub struct OpenFile {
// The list of blobs
content: Vec<BlobInfo>,
}

// Information about the blob: 1) The id 2) The cumulated sizes of all blobs prior to this one, a.k.a the starting point of this blob.
#[derive(Debug)]
struct BlobInfo {
// [`Id`] of the blob
id: DataId,

// the start position of this blob within the file
starts_at: usize,
content: Vec<DataId>,
offsets: ContentOffset,
}

impl OpenFile {
Expand All @@ -488,26 +479,15 @@ impl OpenFile {
///
/// * Panics if the `Node` has no content
pub fn from_node<P, S: IndexedFull>(repo: &Repository<P, S>, node: &Node) -> Self {
let mut start = 0;
let mut content: Vec<_> = node
.content
.as_ref()
.unwrap()
.iter()
.map(|id| {
let starts_at = start;
start += repo.index().get_data(id).unwrap().data_length() as usize;
BlobInfo { id: *id, starts_at }
})
.collect();
let content: Vec<_> = node.content.as_ref().unwrap().clone();

// content is assumed to be partitioned, so we add a starts_at:MAX entry
content.push(BlobInfo {
id: DataId::default(),
starts_at: usize::MAX,
});
let offsets = ContentOffset::from_sizes(
content
.iter()

Check warning on line 486 in crates/core/src/vfs.rs

View check run for this annotation

Codecov / codecov/patch

crates/core/src/vfs.rs#L485-L486

Added lines #L485 - L486 were not covered by tests
.map(|id| repo.index().get_data(id).unwrap().data_length() as usize),
);

Self { content }
Self { content, offsets }
}

/// Read the `OpenFile` at the given `offset` from the `repo`.
Expand All @@ -530,19 +510,16 @@ impl OpenFile {
pub fn read_at<P, S: IndexedFull>(
&self,
repo: &Repository<P, S>,
mut offset: usize,
offset: usize,
mut length: usize,
) -> RusticResult<Bytes> {
// find the start of relevant blobs => find the largest index such that self.content[i].starts_at <= offset, but
// self.content[i+1] > offset (note that a last dummy element has been added)
let mut i = self.content.partition_point(|c| c.starts_at <= offset) - 1;

offset -= self.content[i].starts_at;
let (mut i, mut offset) = self.offsets.offset(offset);

let mut result = BytesMut::with_capacity(length);

while length > 0 && i < self.content.len() - 1 {
let data = repo.get_blob_cached(&BlobId::from(*self.content[i].id), BlobType::Data)?;
// The case of empty node.content is also correctly handled here
while length > 0 && i < self.content.len() {
let data = repo.get_blob_cached(&BlobId::from(self.content[i]), BlobType::Data)?;

if offset > data.len() {
// we cannot read behind the blob. This only happens if offset is too large to fit in the last blob
Expand All @@ -563,3 +540,66 @@ impl OpenFile {
Ok(result.into())
}
}

#[derive(Debug)]
struct ContentOffset(Vec<usize>);

impl ContentOffset {
fn from_sizes(sizes: impl IntoIterator<Item = usize>) -> Self {
let mut start = 0;
let mut offsets: Vec<_> = sizes

Check warning on line 550 in crates/core/src/vfs.rs

View check run for this annotation

Codecov / codecov/patch

crates/core/src/vfs.rs#L550

Added line #L550 was not covered by tests
.into_iter()
.map(|size| {

Check warning on line 552 in crates/core/src/vfs.rs

View check run for this annotation

Codecov / codecov/patch

crates/core/src/vfs.rs#L552

Added line #L552 was not covered by tests
let starts_at = start;
start += size;
starts_at

Check warning on line 555 in crates/core/src/vfs.rs

View check run for this annotation

Codecov / codecov/patch

crates/core/src/vfs.rs#L555

Added line #L555 was not covered by tests
})
.collect();

if !offsets.is_empty() {
// offsets is assumed to be partitioned, so we add a starts_at:MAX entry
offsets.push(usize::MAX);

Check warning on line 561 in crates/core/src/vfs.rs

View check run for this annotation

Codecov / codecov/patch

crates/core/src/vfs.rs#L561

Added line #L561 was not covered by tests
}
Self(offsets)
}

fn offset(&self, mut offset: usize) -> (usize, usize) {
if self.0.is_empty() {
return (0, 0);
}
// find the start of relevant blobs => find the largest index such that self.offsets[i] <= offset, but
// self.offsets[i+1] > offset (note that a last dummy element with usize::MAX has been added to ensure we always have two partitions)
// If offsets is non-empty, then offsets[0] = 0, hence partition_point returns an index >=1.
let i = self.0.partition_point(|o| o <= &offset) - 1;
offset -= self.0[i];
(i, offset)
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn content_offsets_empty_sizes() {
let offsets = ContentOffset::from_sizes([]);
assert_eq!(offsets.offset(0), (0, 0));
assert_eq!(offsets.offset(42), (0, 0));
}

#[test]
fn content_offsets_size() {
let offsets = ContentOffset::from_sizes([15]);
assert_eq!(offsets.offset(0), (0, 0));
assert_eq!(offsets.offset(5), (0, 5));
assert_eq!(offsets.offset(20), (0, 20));
}
#[test]
fn content_offsets_sizes() {
let offsets = ContentOffset::from_sizes([15, 24]);
assert_eq!(offsets.offset(0), (0, 0));
assert_eq!(offsets.offset(5), (0, 5));
assert_eq!(offsets.offset(20), (1, 5));
assert_eq!(offsets.offset(42), (1, 27));
}
}
8 changes: 6 additions & 2 deletions crates/core/tests/integration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -456,11 +456,15 @@ fn test_ls_and_read(

let data = repo.read_file_at(&file, 0, 21)?; // read full content
assert_eq!(Bytes::from("This is a test file.\n"), &data);
let data2 = repo.read_file_at(&file, 0, 4096)?; // read beyond file end
assert_eq!(data2, &data);
assert_eq!(data, repo.read_file_at(&file, 0, 4096)?); // read beyond file end
assert_eq!(Bytes::new(), repo.read_file_at(&file, 25, 1)?); // offset beyond file end
assert_eq!(Bytes::from("test"), repo.read_file_at(&file, 10, 4)?); // read partial content

// test reading an empty file from the repository
let path: PathBuf = ["test", "0", "tests", "empty-file"].iter().collect();
let node = entries.get(&path).unwrap();
let file = repo.open_file(node)?;
assert_eq!(Bytes::new(), repo.read_file_at(&file, 0, 0)?); // empty files
Ok(())
}

Expand Down

0 comments on commit 0dae71f

Please sign in to comment.