Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add capability of prefetch for rafs v6 format #319

Merged
merged 8 commits into from
Mar 10, 2022
6 changes: 2 additions & 4 deletions rafs/src/fs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -464,9 +464,7 @@ impl Rafs {
let prefetch_all = self.prefetch_all;

let _ = std::thread::spawn(move || {
if sb.meta.is_v5() {
Self::do_prefetch_v5(reader, prefetch_files, prefetch_all, sb, device);
}
Self::do_prefetch(reader, prefetch_files, prefetch_all, sb, device);
});
}

Expand All @@ -479,7 +477,7 @@ impl Rafs {
self.sb.superblock.root_ino()
}

fn do_prefetch_v5(
fn do_prefetch(
mut reader: RafsIoReader,
prefetch_files: Option<Vec<PathBuf>>,
prefetch_all: bool,
Expand Down
2 changes: 1 addition & 1 deletion rafs/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ pub trait RafsIoWrite: Write + Seek + 'static {
}

/// Seek the writer to the `offset`.
fn seek_to_offset(&mut self, offset: u64) -> Result<u64> {
fn seek_offset(&mut self, offset: u64) -> Result<u64> {
self.seek(SeekFrom::Start(offset)).map_err(|e| {
error!("Seeking to offset {} from start fails, {}", offset, e);
e
Expand Down
35 changes: 33 additions & 2 deletions rafs/src/metadata/direct_v6.rs
Original file line number Diff line number Diff line change
Expand Up @@ -878,7 +878,8 @@ impl RafsInode for OndiskInodeWrapper {

/// Check whether the inode is a hardlink.
fn is_hardlink(&self) -> bool {
todo!()
let inode = self.disk_inode();
inode.nlink() > 1 && self.is_reg()
}

/// Get inode number of the parent directory.
Expand Down Expand Up @@ -916,7 +917,37 @@ impl RafsInode for OndiskInodeWrapper {
&self,
descendants: &mut Vec<Arc<dyn RafsInode>>,
) -> Result<usize> {
todo!()
if !self.is_dir() {
return Err(enotdir!());
}

let mut child_dirs: Vec<Arc<dyn RafsInode>> = Vec::new();

// EROFS packs dot and dotdot, so skip them two.
self.walk_children_inodes(2, &mut |inode: Option<Arc<dyn RafsInode>>,
name: OsString,
ino,
offset| {
// Safe to unwrap since it must have child inode.
if let Some(child_inode) = inode {
if child_inode.is_dir() {
trace!("Got dir {:?}", child_inode.name());
child_dirs.push(child_inode);
} else if !child_inode.is_empty_size() && child_inode.is_reg() {
bergwolf marked this conversation as resolved.
Show resolved Hide resolved
descendants.push(child_inode);
}
Ok(PostWalkAction::Continue)
} else {
Ok(PostWalkAction::Continue)
}
})
.unwrap();

for d in child_dirs {
d.collect_descendants_inodes(descendants)?;
}

Ok(0)
}

fn alloc_bio_vecs(&self, offset: u64, size: usize, user_io: bool) -> Result<Vec<BlobIoVec>> {
Expand Down
95 changes: 91 additions & 4 deletions rafs/src/metadata/layout/v6.rs
Original file line number Diff line number Diff line change
Expand Up @@ -300,8 +300,11 @@ pub struct RafsV6SuperBlockExt {
s_blob_table_size: u32,
/// chunk size
s_chunk_size: u32,
s_prefetch_table_offset: u64,
s_prefetch_table_size: u32,
s_padding: u32,
/// Reserved
s_reserved: [u8; 232],
s_reserved: [u8; 216],
}

impl_bootstrap_converter!(RafsV6SuperBlockExt);
Expand Down Expand Up @@ -393,13 +396,25 @@ impl RafsV6SuperBlockExt {
u64
);
impl_pub_getter_setter!(blob_table_size, set_blob_table_size, s_blob_table_size, u32);
impl_pub_getter_setter!(
prefetch_table_size,
set_prefetch_table_size,
s_prefetch_table_size,
u32
);
impl_pub_getter_setter!(
prefetch_table_offset,
set_prefetch_table_offset,
s_prefetch_table_offset,
u64
);
}

impl RafsStore for RafsV6SuperBlockExt {
fn store(&self, w: &mut dyn RafsIoWrite) -> Result<usize> {
w.seek_to_offset((EROFS_SUPER_OFFSET + EROFS_SUPER_BLOCK_SIZE) as u64)?;
w.seek_offset((EROFS_SUPER_OFFSET + EROFS_SUPER_BLOCK_SIZE) as u64)?;
w.write_all(self.as_ref())?;
w.seek_to_offset(EROFS_BLOCK_SIZE as u64)?;
w.seek_offset(EROFS_BLOCK_SIZE as u64)?;

Ok(EROFS_BLOCK_SIZE as usize - (EROFS_SUPER_OFFSET + EROFS_SUPER_BLOCK_SIZE) as usize)
}
Expand All @@ -412,7 +427,10 @@ impl Default for RafsV6SuperBlockExt {
s_blob_table_offset: u64::to_le(0),
s_blob_table_size: u32::to_le(0),
s_chunk_size: u32::to_le(0),
s_reserved: [0u8; 232],
s_prefetch_table_offset: u64::to_le(0),
s_prefetch_table_size: u32::to_le(0),
s_padding: u32::to_le(0),
s_reserved: [0u8; 216],
changweige marked this conversation as resolved.
Show resolved Hide resolved
}
}
}
Expand Down Expand Up @@ -1640,6 +1658,75 @@ impl RafsXAttrs {
}
}

#[derive(Clone, Default, Debug)]
pub struct RafsV6PrefetchTable {
/// List of inode numbers for prefetch.
/// Note: It's not inode index of inodes table being stored here.
pub inodes: Vec<u32>,
}

impl RafsV6PrefetchTable {
/// Create a new instance of `RafsV6PrefetchTable`.
pub fn new() -> RafsV6PrefetchTable {
RafsV6PrefetchTable { inodes: vec![] }
}

/// Get content size of the inode prefetch table.
pub fn size(&self) -> usize {
self.len() * size_of::<u64>()
}

/// Get number of entries in the prefetch table.
pub fn len(&self) -> usize {
self.inodes.len()
}

/// Check whether the inode prefetch table is empty.
pub fn is_empty(&self) -> bool {
self.inodes.is_empty()
}

/// Add an inode into the inode prefetch table.
pub fn add_entry(&mut self, ino: u32) {
self.inodes.push(ino);
}

/// Store the inode prefetch table to a writer.
pub fn store(&mut self, w: &mut dyn RafsIoWrite) -> Result<usize> {
// Sort prefetch table by inode index, hopefully, it can save time when mounting rafs
// Because file data is dumped in the order of inode index.
self.inodes.sort_unstable();

let (_, data, _) = unsafe { self.inodes.align_to::<u8>() };
w.write_all(data.as_ref())?;

// OK. Let's see if we have to align... :-(
// let cur_len = self.inodes.len() * size_of::<u32>();

Ok(data.len())
}

/// Load a inode prefetch table from a reader.
///
/// Note: Generally, prefetch happens after loading bootstrap, so with methods operating
/// files with changing their offset won't bring errors. But we still use `pread` now so as
/// to make this method more stable and robust. Even dup(2) can't give us a separated file struct.
pub fn load_prefetch_table_from(
&mut self,
r: &mut RafsIoReader,
offset: u64,
entries: usize,
) -> Result<usize> {
self.inodes = vec![0u32; entries];

let (_, data, _) = unsafe { self.inodes.align_to_mut::<u8>() };
r.seek_to_offset(offset)?;
r.read_exact(data)?;

Ok(data.len())
}
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down
58 changes: 57 additions & 1 deletion rafs/src/metadata/md_v6.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,19 @@
//
// SPDX-License-Identifier: Apache-2.0

use std::collections::HashSet;
use std::io::Result;
use std::mem::size_of;
use std::sync::Arc;

use super::direct_v6::DirectSuperBlockV6;
use super::layout::v6::{RafsV6SuperBlock, RafsV6SuperBlockExt};
use super::layout::v6::{RafsV6PrefetchTable, RafsV6SuperBlock, RafsV6SuperBlockExt};
use super::layout::RAFS_SUPER_VERSION_V6;
use super::*;
use super::{RafsMode, RafsSuper, RafsSuperBlock, RafsSuperFlags};

use crate::RafsIoReader;
use crate::{RafsError, RafsResult};

impl RafsSuper {
pub(crate) fn try_load_v6(&mut self, r: &mut RafsIoReader) -> Result<bool> {
Expand Down Expand Up @@ -39,6 +44,15 @@ impl RafsSuper {
self.meta.meta_blkaddr = sb.s_meta_blkaddr;
self.meta.root_nid = sb.s_root_nid;

self.meta.prefetch_table_entries = ext_sb.prefetch_table_size() / size_of::<u64>() as u32;
self.meta.prefetch_table_offset = ext_sb.prefetch_table_offset();

trace!(
"prefetch table offset {} entries {} ",
self.meta.prefetch_table_offset,
self.meta.prefetch_table_entries
);

match self.mode {
RafsMode::Direct => {
let mut sb_v6 = DirectSuperBlockV6::new(&self.meta, self.validate_digest);
Expand All @@ -49,6 +63,48 @@ impl RafsSuper {
RafsMode::Cached => Err(enosys!("Rafs v6 does not support cached mode")),
}
}

pub(crate) fn prefetch_data_v6<F>(&self, r: &mut RafsIoReader, fetcher: F) -> RafsResult<usize>
where
F: Fn(&mut BlobIoVec),
{
let hint_entries = self.meta.prefetch_table_entries as usize;

if hint_entries == 0 {
return Ok(0);
}

let mut prefetch_table = RafsV6PrefetchTable::new();
let mut hardlinks: HashSet<u64> = HashSet::new();
let mut head_desc = BlobIoVec::new();

// Try to prefetch according to the list of files specified by the
// builder's `--prefetch-policy fs` option.
prefetch_table
.load_prefetch_table_from(r, self.meta.prefetch_table_offset, hint_entries)
.map_err(|e| {
RafsError::Prefetch(format!(
"Failed in loading hint prefetch table at offset {}. {:?}",
self.meta.prefetch_table_offset, e
))
})?;

trace!("prefetch table contents {:?}", prefetch_table);

for ino in prefetch_table.inodes {
// Inode number 0 is invalid, it was added because prefetch table has to be aligned.
if ino == 0 {
break;
}
debug!("hint prefetch inode {}", ino);
self.prefetch_data(ino as u64, &mut head_desc, &mut hardlinks, &fetcher)
.map_err(|e| RafsError::Prefetch(e.to_string()))?;
}
// The left chunks whose size is smaller than 4MB will be fetched here.
fetcher(&mut head_desc);

Ok(hint_entries)
}
}

#[cfg(test)]
Expand Down
14 changes: 11 additions & 3 deletions rafs/src/metadata/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -553,15 +553,16 @@ impl RafsSuper {

/// Convert a file path to an inode number.
pub fn ino_from_path(&self, f: &Path) -> Result<u64> {
let root_ino = self.superblock.root_ino();
if f == Path::new("/") {
return Ok(ROOT_ID);
return Ok(root_ino);
}

if !f.starts_with("/") {
return Err(einval!());
}

let mut parent = self.get_inode(ROOT_ID, self.validate_digest)?;
let mut parent = self.get_inode(root_ino, self.validate_digest)?;

let entries = f
.components()
Expand Down Expand Up @@ -637,6 +638,8 @@ impl RafsSuper {
Ok(())
} else if self.meta.is_v5() {
self.prefetch_data_v5(r, fetcher).map(|_| ())
} else if self.meta.is_v6() {
self.prefetch_data_v6(r, fetcher).map(|_| ())
} else {
Err(RafsError::Prefetch(
"Unknown filesystem version, prefetch disabled".to_string(),
Expand Down Expand Up @@ -711,7 +714,12 @@ impl RafsSuper {
for i in descendants.iter() {
Self::prefetch_inode(i, head_desc, hardlinks, try_prefetch)?;
}
} else if !inode.is_empty_size() {
} else if !inode.is_empty_size() && inode.is_reg() {
// An empty regular file will also be packed into nydus image,
// then it has a size of zero.
// Moreover, for rafs v5, symlink has size of zero but non-zero size
// for symlink size. For rafs v6, symlink size is also represented by i_size.
// So we have to restrain the condition here.
Self::prefetch_inode(&inode, head_desc, hardlinks, try_prefetch)?;
}

Expand Down
Loading