Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add --blob-cache-dir arg use to generate raw blob cache and meta #1433

Merged
merged 1 commit into from
Oct 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions builder/src/compact.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ use nydus_utils::{digest, try_round_up_4k};
use serde::{Deserialize, Serialize};
use sha2::Digest;

use crate::core::context::Artifact;

use super::core::blob::Blob;
use super::core::bootstrap::Bootstrap;
use super::{
Expand Down
16 changes: 8 additions & 8 deletions builder/src/core/blob.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
// SPDX-License-Identifier: Apache-2.0

use std::borrow::Cow;
use std::io::Write;
use std::slice;

use anyhow::{Context, Result};
Expand All @@ -16,9 +15,8 @@ use sha2::digest::Digest;

use super::layout::BlobLayout;
use super::node::Node;
use crate::{
ArtifactWriter, BlobContext, BlobManager, BuildContext, ConversionType, Feature, Tree,
};
use crate::core::context::Artifact;
use crate::{BlobContext, BlobManager, BuildContext, ConversionType, Feature, Tree};

/// Generator for RAFS data blob.
pub(crate) struct Blob {}
Expand All @@ -29,7 +27,7 @@ impl Blob {
ctx: &BuildContext,
tree: &Tree,
blob_mgr: &mut BlobManager,
blob_writer: &mut ArtifactWriter,
blob_writer: &mut dyn Artifact,
) -> Result<()> {
match ctx.conversion_type {
ConversionType::DirectoryToRafs => {
Expand Down Expand Up @@ -101,7 +99,7 @@ impl Blob {
fn finalize_blob_data(
ctx: &BuildContext,
blob_mgr: &mut BlobManager,
blob_writer: &mut ArtifactWriter,
blob_writer: &mut dyn Artifact,
) -> Result<()> {
// Dump buffered batch chunk data if exists.
if let Some(ref batch) = ctx.blob_batch_generator {
Expand Down Expand Up @@ -159,7 +157,7 @@ impl Blob {
pub(crate) fn dump_meta_data(
ctx: &BuildContext,
blob_ctx: &mut BlobContext,
blob_writer: &mut ArtifactWriter,
blob_writer: &mut dyn Artifact,
) -> Result<()> {
// Dump blob meta for v6 when it has chunks or bootstrap is to be inlined.
if !blob_ctx.blob_meta_info_enabled || blob_ctx.uncompressed_blob_size == 0 {
Expand Down Expand Up @@ -194,7 +192,6 @@ impl Blob {
} else if ctx.blob_tar_reader.is_some() {
header.set_separate_blob(true);
};

let mut compressor = Self::get_compression_algorithm_for_meta(ctx);
let (compressed_data, compressed) = compress::compress(ci_data, compressor)
.with_context(|| "failed to compress blob chunk info array".to_string())?;
Expand Down Expand Up @@ -223,6 +220,9 @@ impl Blob {
}

blob_ctx.blob_meta_header = header;
if let Some(blob_cache) = ctx.blob_cache_generator.as_ref() {
zyfjeff marked this conversation as resolved.
Show resolved Hide resolved
blob_cache.write_blob_meta(ci_data, &header)?;
}
let encrypted_header =
crypt::encrypt_with_context(header.as_bytes(), cipher_obj, cipher_ctx, encrypt)?;
let header_size = encrypted_header.len();
Expand Down
168 changes: 140 additions & 28 deletions builder/src/core/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ use std::collections::{HashMap, VecDeque};
use std::convert::TryFrom;
use std::fs::{remove_file, rename, File, OpenOptions};
use std::io::{BufWriter, Cursor, Read, Seek, Write};
use std::mem::size_of;
use std::os::unix::fs::FileTypeExt;
use std::path::{Display, Path, PathBuf};
use std::str::FromStr;
Expand Down Expand Up @@ -40,7 +41,7 @@ use nydus_storage::meta::{
BlobMetaChunkArray, BlobMetaChunkInfo, ZranContextGenerator,
};
use nydus_utils::digest::DigestData;
use nydus_utils::{compress, digest, div_round_up, round_down, BufReaderInfo};
use nydus_utils::{compress, digest, div_round_up, round_down, try_round_up_4k, BufReaderInfo};

use super::node::ChunkSource;
use crate::core::tree::TreeNode;
Expand Down Expand Up @@ -193,7 +194,13 @@ impl Write for ArtifactMemoryWriter {
}
}

struct ArtifactFileWriter(ArtifactWriter);
struct ArtifactFileWriter(pub ArtifactWriter);

impl ArtifactFileWriter {
pub fn finalize(&mut self, name: Option<String>) -> Result<()> {
self.0.finalize(name)
}
}

impl RafsIoWrite for ArtifactFileWriter {
fn as_any(&self) -> &dyn Any {
Expand All @@ -215,6 +222,12 @@ impl RafsIoWrite for ArtifactFileWriter {
}
}

impl ArtifactFileWriter {
pub fn set_len(&mut self, s: u64) -> std::io::Result<()> {
self.0.file.get_mut().set_len(s)
}
}

impl Seek for ArtifactFileWriter {
fn seek(&mut self, pos: std::io::SeekFrom) -> std::io::Result<u64> {
self.0.file.seek(pos)
Expand All @@ -231,6 +244,37 @@ impl Write for ArtifactFileWriter {
}
}

pub trait Artifact: Write {
fn pos(&self) -> Result<u64>;
fn finalize(&mut self, name: Option<String>) -> Result<()>;
}

#[derive(Default)]
pub struct NoopArtifactWriter {
pos: usize,
}

impl Write for NoopArtifactWriter {
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
self.pos += buf.len();
Ok(buf.len())
}

fn flush(&mut self) -> std::io::Result<()> {
Ok(())
}
}

impl Artifact for NoopArtifactWriter {
fn pos(&self) -> Result<u64> {
Ok(self.pos as u64)
}

fn finalize(&mut self, _name: Option<String>) -> Result<()> {
Ok(())
}
}

/// ArtifactWriter provides a writer to allow writing bootstrap
/// or blob data to a single file or in a directory.
pub struct ArtifactWriter {
Expand Down Expand Up @@ -308,36 +352,18 @@ impl ArtifactWriter {
}
}
}
}

impl Artifact for ArtifactWriter {
/// Get the current write position.
pub fn pos(&self) -> Result<u64> {
fn pos(&self) -> Result<u64> {
Ok(self.pos as u64)
}

// The `inline-bootstrap` option merges the blob and bootstrap into one
// file. We need some header to index the location of the blob and bootstrap,
// write_tar_header uses tar header that arranges the data as follows:
// data | tar_header | data | tar_header
// This is a tar-like structure, except that we put the tar header after the
// data. The advantage is that we do not need to determine the size of the data
// first, so that we can write the blob data by stream without seek to improve
// the performance of the blob dump by using fifo.
fn write_tar_header(&mut self, name: &str, size: u64) -> Result<Header> {
let mut header = Header::new_gnu();
header.set_path(Path::new(name))?;
header.set_entry_type(EntryType::Regular);
header.set_size(size);
// The checksum must be set to ensure that the tar reader implementation
// in golang can correctly parse the header.
header.set_cksum();
self.write_all(header.as_bytes())?;
Ok(header)
}

/// Finalize the metadata/data blob.
///
/// When `name` is None, it means that the blob is empty and should be removed.
pub fn finalize(&mut self, name: Option<String>) -> Result<()> {
fn finalize(&mut self, name: Option<String>) -> Result<()> {
self.file.flush()?;

if let Some(n) = name {
Expand Down Expand Up @@ -367,6 +393,72 @@ impl ArtifactWriter {
}
}

pub struct BlobCacheGenerator {
blob_data: Mutex<ArtifactFileWriter>,
blob_meta: Mutex<ArtifactFileWriter>,
}

impl BlobCacheGenerator {
pub fn new(storage: ArtifactStorage) -> Result<Self> {
Ok(BlobCacheGenerator {
blob_data: Mutex::new(ArtifactFileWriter(ArtifactWriter::new(storage.clone())?)),
blob_meta: Mutex::new(ArtifactFileWriter(ArtifactWriter::new(storage)?)),
})
}

pub fn write_blob_meta(
&self,
data: &[u8],
header: &BlobCompressionContextHeader,
) -> Result<()> {
let mut guard = self.blob_meta.lock().unwrap();
let aligned_uncompressed_size = try_round_up_4k(data.len() as u64).ok_or(anyhow!(
format!("invalid input {} for try_round_up_4k", data.len())
))?;
guard.set_len(
aligned_uncompressed_size + size_of::<BlobCompressionContextHeader>() as u64,
)?;
guard
.write_all(data)
.context("failed to write blob meta data")?;
guard.seek(std::io::SeekFrom::Start(aligned_uncompressed_size))?;
guard
.write_all(header.as_bytes())
.context("failed to write blob meta header")?;
Ok(())
}

pub fn write_blob_data(
&self,
chunk_data: &[u8],
chunk_info: &ChunkWrapper,
aligned_d_size: u32,
) -> Result<()> {
let mut guard = self.blob_data.lock().unwrap();
let curr_pos = guard.seek(std::io::SeekFrom::End(0))?;
if curr_pos < chunk_info.uncompressed_offset() + aligned_d_size as u64 {
guard.set_len(chunk_info.uncompressed_offset() + aligned_d_size as u64)?;
}

guard.seek(std::io::SeekFrom::Start(chunk_info.uncompressed_offset()))?;
guard
.write_all(&chunk_data)
.context("failed to write blob cache")?;
Ok(())
}

pub fn finalize(&self, name: &str) -> Result<()> {
let blob_data_name = format!("{}.blob.data", name);
let mut guard = self.blob_data.lock().unwrap();
guard.finalize(Some(blob_data_name))?;
drop(guard);

let blob_meta_name = format!("{}.blob.meta", name);
let mut guard = self.blob_meta.lock().unwrap();
guard.finalize(Some(blob_meta_name))
}
}

/// BlobContext is used to hold the blob information of a layer during build.
pub struct BlobContext {
/// Blob id (user specified or sha256(blob)).
Expand Down Expand Up @@ -731,7 +823,7 @@ impl BlobContext {
}

/// Helper to write data to blob and update blob hash.
pub fn write_data(&mut self, blob_writer: &mut ArtifactWriter, data: &[u8]) -> Result<()> {
pub fn write_data(&mut self, blob_writer: &mut dyn Artifact, data: &[u8]) -> Result<()> {
blob_writer.write_all(data)?;
self.blob_hash.update(data);
Ok(())
Expand All @@ -740,11 +832,28 @@ impl BlobContext {
/// Helper to write a tar header to blob and update blob hash.
pub fn write_tar_header(
&mut self,
blob_writer: &mut ArtifactWriter,
blob_writer: &mut dyn Artifact,
name: &str,
size: u64,
) -> Result<Header> {
let header = blob_writer.write_tar_header(name, size)?;
// The `inline-bootstrap` option merges the blob and bootstrap into one
// file. We need some header to index the location of the blob and bootstrap,
// write_tar_header uses tar header that arranges the data as follows:
// data | tar_header | data | tar_header
// This is a tar-like structure, except that we put the tar header after the
// data. The advantage is that we do not need to determine the size of the data
// first, so that we can write the blob data by stream without seek to improve
// the performance of the blob dump by using fifo.

let mut header = Header::new_gnu();
header.set_path(Path::new(name))?;
header.set_entry_type(EntryType::Regular);
header.set_size(size);
// The checksum must be set to ensure that the tar reader implementation
// in golang can correctly parse the header.
header.set_cksum();

blob_writer.write_all(header.as_bytes())?;
self.blob_hash.update(header.as_bytes());
Ok(header)
}
Expand Down Expand Up @@ -1182,6 +1291,8 @@ pub struct BuildContext {

pub features: Features,
pub configuration: Arc<ConfigV2>,
/// Generate the blob cache and blob meta
pub blob_cache_generator: Option<BlobCacheGenerator>,
}

impl BuildContext {
Expand Down Expand Up @@ -1221,7 +1332,6 @@ impl BuildContext {
} else {
crypt::Algorithm::None
};

BuildContext {
blob_id,
aligned_chunk,
Expand Down Expand Up @@ -1250,6 +1360,7 @@ impl BuildContext {

features,
configuration: Arc::new(ConfigV2::default()),
blob_cache_generator: None,
}
}

Expand Down Expand Up @@ -1299,6 +1410,7 @@ impl Default for BuildContext {
blob_inline_meta: false,
features: Features::new(),
configuration: Arc::new(ConfigV2::default()),
blob_cache_generator: None,
}
}
}
Expand Down
Loading