Skip to content

Commit

Permalink
rafs: prefetch based on blob chunks rather than files
Browse files Browse the repository at this point in the history
Perform different policy for v5 format and v6 format as rafs v6's blobs are capable to
to download chunks and decompress them all by themselves. For rafs v6, directly perform
chunk based full prefetch to reduce requests to container registry and
P2P cluster.

Signed-off-by: Changwei Ge <gechangwei@bytedance.com>
  • Loading branch information
changweige committed Nov 23, 2022
1 parent 3efd75a commit 2a9ad1b
Show file tree
Hide file tree
Showing 4 changed files with 108 additions and 16 deletions.
74 changes: 58 additions & 16 deletions rafs/src/fs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,10 @@ use serde::Deserialize;

use nydus_api::http::{BlobPrefetchConfig, FactoryConfig};
use nydus_storage::device::{BlobDevice, BlobPrefetchRequest};
use nydus_utils::metrics::{self, FopRecorder, StatsFop::*};
use nydus_utils::{
div_round_up,
metrics::{self, FopRecorder, StatsFop::*},
};
use storage::RAFS_DEFAULT_CHUNK_SIZE;

use crate::metadata::{
Expand Down Expand Up @@ -505,9 +508,8 @@ impl Rafs {
device: BlobDevice,
) {
// First do range based prefetch for rafs v6.
if sb.meta.is_v6() {
if sb.meta.is_v6() && !sb.superblock.get_blob_infos().is_empty() {
let mut prefetches = Vec::new();

for blob in sb.superblock.get_blob_infos() {
let sz = blob.readahead_size();
if sz > 0 {
Expand All @@ -530,7 +532,14 @@ impl Rafs {
}
}

let mut ignore_prefetch_all = prefetch_files
let inlay_prefetch_all = sb
.is_inlay_prefetch_all(&mut reader)
.map_err(|e| error!("Detect prefetch table error {}", e))
.unwrap_or_default();

let mut ignore_prefetch_all = false;

let startup_prefetch_all = prefetch_files
.as_ref()
.map(|f| f.len() == 1 && f[0].as_os_str() == "/")
.unwrap_or(false);
Expand All @@ -547,23 +556,56 @@ impl Rafs {
}
});
match res {
Ok(true) => ignore_prefetch_all = true,
Ok(true) => {
ignore_prefetch_all = true;
warn!("Root inode was found, but it should not prefetch all files!")
}
Ok(false) => {}
Err(e) => info!("No file to be prefetched {:?}", e),
}

// Last optionally prefetch all data
if prefetch_all && !ignore_prefetch_all {
let root = vec![root_ino];
let res = sb.prefetch_files(&mut reader, root_ino, Some(root), &|desc| {
if desc.bi_size > 0 {
device.prefetch(&[desc], &[]).unwrap_or_else(|e| {
warn!("Prefetch error, {:?}", e);
});
// Perform different policy for v5 format and v6 format as rafs v6's blobs are capable to
// to download chunks and decompress them all by themselves. For rafs v6, directly perform
// chunk based full prefetch
if !ignore_prefetch_all && (inlay_prefetch_all || prefetch_all || startup_prefetch_all) {
if sb.meta.is_v6() {
let batch_size = 1024 * 1024 * 2;

for blob in sb.superblock.get_blob_infos() {
let blob_size = blob.compressed_size();
let count = div_round_up(blob_size, batch_size);

let mut pre_offset = 0u64;

for _i in 0..count {
let req = BlobPrefetchRequest {
blob_id: blob.blob_id().to_owned(),
offset: pre_offset,
len: cmp::min(batch_size, blob_size - pre_offset),
};
pre_offset += batch_size;
if pre_offset > blob_size {
break;
}

info!("prefetch range req {:?}", req);
if let Err(e) = device.prefetch(&[], &[req]) {
warn!("failed to prefetch blob data, {}", e);
}
}
}
} else {
let root = vec![root_ino];
let res = sb.prefetch_files(&mut reader, root_ino, Some(root), &|desc| {
if desc.bi_size > 0 {
device.prefetch(&[desc], &[]).unwrap_or_else(|e| {
warn!("Prefetch error, {:?}", e);
});
}
});
if let Err(e) = res {
info!("No file to be prefetched {:?}", e);
}
});
if let Err(e) = res {
info!("No file to be prefetched {:?}", e);
}
}
}
Expand Down
47 changes: 47 additions & 0 deletions rafs/src/metadata/md_v6.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,53 @@ impl RafsSuper {
}
}

pub(crate) fn is_inlay_prefetch_all(&self, r: &mut RafsIoReader) -> RafsResult<bool> {
if self.meta.is_v6() {
let hint_entries = self.meta.prefetch_table_entries as usize;
if hint_entries != 1 {
return Ok(false);
}

let mut prefetch_table = RafsV6PrefetchTable::new();
prefetch_table
.load_prefetch_table_from(r, self.meta.prefetch_table_offset, hint_entries)
.map_err(|e| {
RafsError::Prefetch(format!(
"Failed in loading hint prefetch table at offset {}. {:?}",
self.meta.prefetch_table_offset, e
))
})?;

if prefetch_table.inodes[0] as u64 == self.superblock.root_ino() {
Ok(true)
} else {
Ok(false)
}
} else {
let hint_entries = self.meta.prefetch_table_entries as usize;
if hint_entries != 1 {
return Ok(false);
}

let mut prefetch_table = RafsV5PrefetchTable::new();
prefetch_table
.load_prefetch_table_from(r, self.meta.prefetch_table_offset, hint_entries)
.map_err(|e| {
RafsError::Prefetch(format!(
"Failed in loading hint prefetch table at offset {}. {:?}",
self.meta.prefetch_table_offset, e
))
})?;

// TODO: Is u64 large enough for rafs v6?
if prefetch_table.inodes[0] as u64 == self.superblock.root_ino() {
Ok(true)
} else {
Ok(false)
}
}
}

pub(crate) fn prefetch_data_v6<F>(
&self,
r: &mut RafsIoReader,
Expand Down
1 change: 1 addition & 0 deletions rafs/src/metadata/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -635,6 +635,7 @@ impl RafsSuper {
///
/// Each inode passed into should correspond to directory. And it already does the file type
/// check inside.
/// Return Ok(true) means root inode is found during performing prefetching and all files should be prefetched.
pub fn prefetch_files(
&self,
r: &mut RafsIoReader,
Expand Down
2 changes: 2 additions & 0 deletions storage/src/device.rs
Original file line number Diff line number Diff line change
Expand Up @@ -825,6 +825,8 @@ impl BlobIoRange {
/// A `BlobPrefetchControl` object advises to prefetch data range [offset, offset + len) from
/// blob `blob_id`. The prefetch operation should be asynchronous, and cache hit for filesystem
/// read operations should validate data integrity.

#[derive(Debug)]
pub struct BlobPrefetchRequest {
/// The ID of the blob to prefetch data for.
pub blob_id: String,
Expand Down

0 comments on commit 2a9ad1b

Please sign in to comment.