Skip to content

Commit

Permalink
virtio: allow IoVecBufferMut to hold multiple DescriptorChain objects
Browse files Browse the repository at this point in the history
Allow IoVecBufferMut objects to store multiple DescriptorChain objects,
so that we can describe guest memory meant to be used for receiving data
(for example memory used for network RX) as a single (sparse) memory
region.

This will allow us to always keep track all the available memory we have
for performing RX and use `readv` for copying memory from the TAP device
inside guest memory avoiding the extra copy. In the future, it will also
facilitate the implementation of mergeable buffers for the RX path of
the network device.

Signed-off-by: Babis Chalios <bchalios@amazon.es>
  • Loading branch information
bchalios committed Sep 12, 2024
1 parent 02a261f commit f98af8a
Show file tree
Hide file tree
Showing 4 changed files with 143 additions and 43 deletions.
163 changes: 128 additions & 35 deletions src/vmm/src/devices/virtio/iovec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ use vm_memory::{
GuestMemory, GuestMemoryError, ReadVolatile, VolatileMemoryError, VolatileSlice, WriteVolatile,
};

use super::iov_deque::{IovDeque, IovDequeError};
use crate::devices::virtio::queue::DescriptorChain;
use crate::vstate::memory::GuestMemoryMmap;

Expand All @@ -23,6 +24,8 @@ pub enum IoVecError {
OverflowedDescriptor,
/// Guest memory error: {0}
GuestMemory(#[from] GuestMemoryError),
/// Error with underlying `IovDeque`: {0}
IovDeque(#[from] IovDequeError),
}

// Using SmallVec in the kani proofs causes kani to use unbounded amounts of memory
Expand Down Expand Up @@ -219,28 +222,24 @@ impl IoVecBuffer {
/// It describes a write-only buffer passed to us by the guest that is scattered across multiple
/// memory regions. Additionally, this wrapper provides methods that allow reading arbitrary ranges
/// of data from that buffer.
#[derive(Debug, Default, Clone)]
pub struct IoVecBufferMut {
#[derive(Debug)]
pub struct IoVecBufferMut<'a> {
// container of the memory regions included in this IO vector
vecs: IoVecVec,
vecs: IovDeque<'a>,
// Total length of the IoVecBufferMut
len: u32,
len: usize,
}

impl IoVecBufferMut {
/// Create an `IoVecBuffer` from a `DescriptorChain`
///
/// # Safety
///
/// The descriptor chain cannot be referencing the same memory location as another chain
pub unsafe fn load_descriptor_chain(
impl<'a> IoVecBufferMut<'a> {
/// Parse a `DescriptorChain` object and append the memory regions it describes in the
/// underlying ring buffer.
fn parse_descriptor(
&mut self,
mem: &GuestMemoryMmap,
head: DescriptorChain,
) -> Result<(), IoVecError> {
self.clear();

) -> Result<u32, IoVecError> {
let mut next_descriptor = Some(head);
let mut length = 0u32;
while let Some(desc) = next_descriptor {
if !desc.is_write_only() {
return Err(IoVecError::ReadOnlyDescriptor);
Expand All @@ -257,18 +256,80 @@ impl IoVecBufferMut {
slice.bitmap().mark_dirty(0, desc.len as usize);

let iov_base = slice.ptr_guard_mut().as_ptr().cast::<c_void>();
self.vecs.push(iovec {
iov_base,
iov_len: desc.len as size_t,
});
self.len = self
.len
self.vecs
.push_back(iovec {
iov_base,
iov_len: desc.len as size_t,
})
.unwrap();
length = length
.checked_add(desc.len)
.ok_or(IoVecError::OverflowedDescriptor)?;

next_descriptor = desc.next_descriptor();
}

self.len = self
.len
.checked_add(length as usize)
.ok_or(IoVecError::OverflowedDescriptor)?;

Ok(length)
}

/// Create an empty `IoVecBufferMut`.
pub(crate) fn new() -> Result<Self, IovDequeError> {
let vecs = IovDeque::new()?;
Ok(Self { vecs, len: 0 })
}

/// Create an `IoVecBufferMut` from a `DescriptorChain`
///
/// This will clear any previous `iovec` objects in the buffer and load the new
/// [`DescriptorChain`].
///
/// # Safety
///
/// The descriptor chain cannot be referencing the same memory location as another chain
pub unsafe fn load_descriptor_chain(
&mut self,
mem: &GuestMemoryMmap,
head: DescriptorChain,
) -> Result<(), IoVecError> {
self.clear();
let _ = self.parse_descriptor(mem, head)?;
Ok(())
}

/// Append a `DescriptorChain` in this `IoVecBufferMut`
///
/// # Safety
///
/// The descriptor chain cannot be referencing the same memory location as another chain
pub unsafe fn append_descriptor_chain(
&mut self,
mem: &GuestMemoryMmap,
head: DescriptorChain,
) -> Result<u32, IoVecError> {
self.parse_descriptor(mem, head)
}

/// Drop memory from the `IoVecBufferMut`
///
/// This will drop memory described by the `IoVecBufferMut` starting from the beginning.
pub fn drop_iovecs(&mut self, size: u32) -> Result<(), IoVecError> {
let dropped = self.vecs.drop_iovs(size as usize);

// Users should ask us to drop a `size` of memory that is not exactly covered by `iovec`
// objects. In other words, the sum of the lengths of all dropped `iovec` objects should be
// equal to the `size` we were asked to drop. If it isn't, something is seriously wrong
// with the VirtIO queue or the emulation logic, so fail at this point.
assert_eq!(u32::try_from(dropped).unwrap(), size);
self.len = self
.len
.checked_sub(size as usize)
.ok_or(IoVecError::OverflowedDescriptor)?;

Check warning on line 331 in src/vmm/src/devices/virtio/iovec.rs

View check run for this annotation

Codecov / codecov/patch

src/vmm/src/devices/virtio/iovec.rs#L320-L331

Added lines #L320 - L331 were not covered by tests

Ok(())
}

Expand All @@ -281,20 +342,34 @@ impl IoVecBufferMut {
mem: &GuestMemoryMmap,
head: DescriptorChain,
) -> Result<Self, IoVecError> {
let mut new_buffer = Self::default();
let mut new_buffer = Self::new()?;
new_buffer.load_descriptor_chain(mem, head)?;
Ok(new_buffer)
}

/// Get the total length of the memory regions covered by this `IoVecBuffer`
pub(crate) fn len(&self) -> u32 {
///
/// In contrast to the equivalent [`IoVecBuffer::len()`] which returns `u32`, this one returns
/// `usize` since the buffer can contain multiple `DescriptorChain` objects, so we don't have
/// the limit that the length of a buffer is limited by `u32`.
pub(crate) fn len(&self) -> usize {
self.len
}

/// Returns a pointer to the memory keeping the `iovec` structs
pub fn as_iovec_ptr(&mut self) -> *mut iovec {
self.vecs.as_mut_slice().as_mut_ptr()
}

Check warning on line 362 in src/vmm/src/devices/virtio/iovec.rs

View check run for this annotation

Codecov / codecov/patch

src/vmm/src/devices/virtio/iovec.rs#L360-L362

Added lines #L360 - L362 were not covered by tests

/// Returns the length of the `iovec` array.
pub fn iovec_count(&self) -> usize {
self.vecs.len()
}

Check warning on line 367 in src/vmm/src/devices/virtio/iovec.rs

View check run for this annotation

Codecov / codecov/patch

src/vmm/src/devices/virtio/iovec.rs#L365-L367

Added lines #L365 - L367 were not covered by tests

/// Clears the `iovec` array
pub fn clear(&mut self) {
self.vecs.clear();
self.len = 0u32;
self.len = 0;
}

/// Writes a number of bytes into the `IoVecBufferMut` starting at a given offset.
Expand All @@ -313,7 +388,7 @@ impl IoVecBufferMut {
mut buf: &[u8],
offset: usize,
) -> Result<(), VolatileMemoryError> {
if offset < self.len() as usize {
if offset < self.len() {
let expected = buf.len();
let bytes_written = self.write_volatile_at(&mut buf, offset, expected)?;

Expand Down Expand Up @@ -342,7 +417,7 @@ impl IoVecBufferMut {
) -> Result<usize, VolatileMemoryError> {
let mut total_bytes_read = 0;

for iov in &self.vecs {
for iov in self.vecs.as_mut_slice() {
if len == 0 {
break;
}
Expand Down Expand Up @@ -391,6 +466,7 @@ mod tests {
use vm_memory::VolatileMemoryError;

use super::{IoVecBuffer, IoVecBufferMut};
use crate::devices::virtio::iov_deque::IovDeque;
use crate::devices::virtio::queue::{Queue, VIRTQ_DESC_F_NEXT, VIRTQ_DESC_F_WRITE};
use crate::devices::virtio::test_utils::VirtQueue;
use crate::utilities::test_utils::multi_region_mem;
Expand Down Expand Up @@ -427,15 +503,18 @@ mod tests {
}
}

impl From<&mut [u8]> for IoVecBufferMut {
impl<'a> From<&mut [u8]> for IoVecBufferMut<'a> {
fn from(buf: &mut [u8]) -> Self {
let mut vecs = IovDeque::new().unwrap();
vecs.push_back(iovec {
iov_base: buf.as_mut_ptr().cast::<c_void>(),
iov_len: buf.len(),
})
.unwrap();

Self {
vecs: vec![iovec {
iov_base: buf.as_mut_ptr().cast::<c_void>(),
iov_len: buf.len(),
}]
.into(),
len: buf.len().try_into().unwrap(),
vecs,
len: buf.len(),
}
}
}
Expand Down Expand Up @@ -528,8 +607,19 @@ mod tests {
let head = q.pop().unwrap();

// SAFETY: This descriptor chain is only loaded once in this test
let iovec = unsafe { IoVecBufferMut::from_descriptor_chain(&mem, head).unwrap() };
let mut iovec = unsafe { IoVecBufferMut::from_descriptor_chain(&mem, head).unwrap() };
assert_eq!(iovec.len(), 4 * 64);

// We are creating a new queue where we can get descriptors from. Probably, this is not
// something that we will ever want to do, as `IoVecBufferMut`s are typically
// (concpetually) associated with a single `Queue`. We just do this here to be able to test
// the appending logic.
let (mut q, _) = write_only_chain(&mem);
let head = q.pop().unwrap();
// SAFETY: it is actually unsafe, but we just want to check the length of the
// `IoVecBufferMut` after appending.
let _ = unsafe { iovec.append_descriptor_chain(&mem, head).unwrap() };
assert_eq!(iovec.len(), 8 * 64);
}

#[test]
Expand Down Expand Up @@ -728,7 +818,7 @@ mod verification {
}
}

impl IoVecBufferMut {
impl IoVecBufferMut<'_> {
fn any_of_length(nr_descs: usize) -> Self {
// We only write into `IoVecBufferMut` objects, so we can simply create a guest memory
// object initialized to zeroes, trying to be nice to Kani.
Expand All @@ -740,7 +830,10 @@ mod verification {
};

let (vecs, len) = create_iovecs(mem, GUEST_MEMORY_SIZE, nr_descs);
Self { vecs, len }
Self {
vecs,
len: len.try_into().unwrap(),
}
}
}

Expand Down
9 changes: 6 additions & 3 deletions src/vmm/src/devices/virtio/rng/device.rs
Original file line number Diff line number Diff line change
Expand Up @@ -112,15 +112,15 @@ impl Entropy {
return Ok(0);
}

let mut rand_bytes = vec![0; iovec.len() as usize];
let mut rand_bytes = vec![0; iovec.len()];
rand::fill(&mut rand_bytes).map_err(|err| {
METRICS.host_rng_fails.inc();
err
})?;

// It is ok to unwrap here. We are writing `iovec.len()` bytes at offset 0.
iovec.write_all_volatile_at(&rand_bytes, 0).unwrap();
Ok(iovec.len())
Ok(u32::try_from(iovec.len()).unwrap())
}

fn process_entropy_queue(&mut self) {
Expand All @@ -145,7 +145,10 @@ impl Entropy {
// Check for available rate limiting budget.
// If not enough budget is available, leave the request descriptor in the queue
// to handle once we do have budget.
if !Self::rate_limit_request(&mut self.rate_limiter, u64::from(iovec.len())) {
if !Self::rate_limit_request(
&mut self.rate_limiter,
u64::try_from(iovec.len()).unwrap(),
) {
debug!("entropy: throttling entropy queue");
METRICS.entropy_rate_limiter_throttled.inc();
self.queues[RNG_QUEUE].undo_pop();
Expand Down
4 changes: 4 additions & 0 deletions src/vmm/src/devices/virtio/vsock/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ pub use self::defs::uapi::VIRTIO_ID_VSOCK as TYPE_VSOCK;
pub use self::defs::VSOCK_DEV_ID;
pub use self::device::Vsock;
pub use self::unix::{VsockUnixBackend, VsockUnixBackendError};
use super::iov_deque::IovDequeError;
use crate::devices::virtio::iovec::IoVecError;
use crate::devices::virtio::persist::PersistError as VirtioStateError;

Expand Down Expand Up @@ -138,6 +139,8 @@ pub enum VsockError {
VirtioState(VirtioStateError),
/// Vsock uds backend error: {0}
VsockUdsBackend(VsockUnixBackendError),
/// Underlying IovDeque error: {0}
IovDeque(IovDequeError),
}

impl From<IoVecError> for VsockError {
Expand All @@ -147,6 +150,7 @@ impl From<IoVecError> for VsockError {
IoVecError::ReadOnlyDescriptor => VsockError::UnwritableDescriptor,
IoVecError::GuestMemory(err) => VsockError::GuestMemoryMmap(err),
IoVecError::OverflowedDescriptor => VsockError::DescChainOverflow,
IoVecError::IovDeque(err) => VsockError::IovDeque(err),

Check warning on line 153 in src/vmm/src/devices/virtio/vsock/mod.rs

View check run for this annotation

Codecov / codecov/patch

src/vmm/src/devices/virtio/vsock/mod.rs#L153

Added line #L153 was not covered by tests
}
}
}
Expand Down
10 changes: 5 additions & 5 deletions src/vmm/src/devices/virtio/vsock/packet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ pub enum VsockPacketBuffer {
/// Buffer holds a read-only guest-to-host (TX) packet
Tx(IoVecBuffer),
/// Buffer holds a write-only host-to-guest (RX) packet
Rx(IoVecBufferMut),
Rx(IoVecBufferMut<'static>),
}

/// Struct describing a single vsock packet.
Expand Down Expand Up @@ -172,8 +172,8 @@ impl VsockPacket {
// are live at the same time, meaning this has exclusive ownership over the memory
let buffer = unsafe { IoVecBufferMut::from_descriptor_chain(mem, chain)? };

if buffer.len() < VSOCK_PKT_HDR_SIZE {
return Err(VsockError::DescChainTooShortForHeader(buffer.len() as usize));
if (u32::try_from(buffer.len()).unwrap()) < VSOCK_PKT_HDR_SIZE {
return Err(VsockError::DescChainTooShortForHeader(buffer.len()));
}

Ok(Self {
Expand Down Expand Up @@ -222,7 +222,7 @@ impl VsockPacket {
pub fn buf_size(&self) -> usize {
let chain_length = match self.buffer {
VsockPacketBuffer::Tx(ref iovec_buf) => iovec_buf.len(),
VsockPacketBuffer::Rx(ref iovec_buf) => iovec_buf.len(),
VsockPacketBuffer::Rx(ref iovec_buf) => iovec_buf.len().try_into().unwrap(),
};
(chain_length - VSOCK_PKT_HDR_SIZE) as usize
}
Expand All @@ -237,7 +237,7 @@ impl VsockPacket {
VsockPacketBuffer::Tx(_) => Err(VsockError::UnwritableDescriptor),
VsockPacketBuffer::Rx(ref mut buffer) => {
if count
> (buffer.len() as usize)
> (buffer.len())
.saturating_sub(VSOCK_PKT_HDR_SIZE as usize)
.saturating_sub(offset)
{
Expand Down

0 comments on commit f98af8a

Please sign in to comment.