Skip to content

Commit

Permalink
vhost: new device IOTLB API
Browse files Browse the repository at this point in the history
This patch tries to implement an device IOTLB for vhost. This could be
used with userspace(qemu) implementation of DMA remapping
to emulate an IOMMU for the guest.

The idea is simple, cache the translation in a software device IOTLB
(which is implemented as an interval tree) in vhost and use vhost_net
file descriptor for reporting IOTLB miss and IOTLB
update/invalidation. When vhost meets an IOTLB miss, the fault
address, size and access can be read from the file. After userspace
finishes the translation, it writes the translated address to the
vhost_net file to update the device IOTLB.

When device IOTLB is enabled by setting VIRTIO_F_IOMMU_PLATFORM all vq
addresses set by ioctl are treated as iova instead of virtual address and
the accessing can only be done through IOTLB instead of direct userspace
memory access. Before each round or vq processing, all vq metadata is
prefetched in device IOTLB to make sure no translation fault happens
during vq processing.

In most cases, virtqueues are contiguous even in virtual address space.
The IOTLB translation for virtqueue itself may make it a little
slower. We might add fast path cache on top of this patch.

Signed-off-by: Jason Wang <jasowang@redhat.com>
[mst: use virtio feature bit: VHOST_F_DEVICE_IOTLB -> VIRTIO_F_IOMMU_PLATFORM ]
[mst: fix build warnings ]
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
[ weiyj.lk: missing unlock on error ]
Signed-off-by: Wei Yongjun <weiyj.lk@gmail.com>
  • Loading branch information
jasowang authored and mstsirkin committed Aug 2, 2016
1 parent b2fbd8b commit 6b1e6cc
Show file tree
Hide file tree
Showing 4 changed files with 705 additions and 50 deletions.
59 changes: 53 additions & 6 deletions drivers/vhost/net.c
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,8 @@ MODULE_PARM_DESC(experimental_zcopytx, "Enable Zero Copy TX;"
enum {
VHOST_NET_FEATURES = VHOST_FEATURES |
(1ULL << VHOST_NET_F_VIRTIO_NET_HDR) |
(1ULL << VIRTIO_NET_F_MRG_RXBUF)
(1ULL << VIRTIO_NET_F_MRG_RXBUF) |
(1ULL << VIRTIO_F_IOMMU_PLATFORM)
};

enum {
Expand Down Expand Up @@ -308,7 +309,7 @@ static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
{
unsigned long uninitialized_var(endtime);
int r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
out_num, in_num, NULL, NULL);
out_num, in_num, NULL, NULL);

if (r == vq->num && vq->busyloop_timeout) {
preempt_disable();
Expand All @@ -318,7 +319,7 @@ static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
cpu_relax_lowlatency();
preempt_enable();
r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
out_num, in_num, NULL, NULL);
out_num, in_num, NULL, NULL);
}

return r;
Expand Down Expand Up @@ -351,6 +352,9 @@ static void handle_tx(struct vhost_net *net)
if (!sock)
goto out;

if (!vq_iotlb_prefetch(vq))
goto out;

vhost_disable_notify(&net->dev, vq);

hdr_size = nvq->vhost_hlen;
Expand Down Expand Up @@ -612,6 +616,10 @@ static void handle_rx(struct vhost_net *net)
sock = vq->private_data;
if (!sock)
goto out;

if (!vq_iotlb_prefetch(vq))
goto out;

vhost_disable_notify(&net->dev, vq);

vhost_hlen = nvq->vhost_hlen;
Expand Down Expand Up @@ -1080,10 +1088,14 @@ static int vhost_net_set_features(struct vhost_net *n, u64 features)
}
mutex_lock(&n->dev.mutex);
if ((features & (1 << VHOST_F_LOG_ALL)) &&
!vhost_log_access_ok(&n->dev)) {
mutex_unlock(&n->dev.mutex);
return -EFAULT;
!vhost_log_access_ok(&n->dev))
goto out_unlock;

if ((features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))) {
if (vhost_init_device_iotlb(&n->dev, true))
goto out_unlock;
}

for (i = 0; i < VHOST_NET_VQ_MAX; ++i) {
mutex_lock(&n->vqs[i].vq.mutex);
n->vqs[i].vq.acked_features = features;
Expand All @@ -1093,6 +1105,10 @@ static int vhost_net_set_features(struct vhost_net *n, u64 features)
}
mutex_unlock(&n->dev.mutex);
return 0;

out_unlock:
mutex_unlock(&n->dev.mutex);
return -EFAULT;
}

static long vhost_net_set_owner(struct vhost_net *n)
Expand Down Expand Up @@ -1166,9 +1182,40 @@ static long vhost_net_compat_ioctl(struct file *f, unsigned int ioctl,
}
#endif

static ssize_t vhost_net_chr_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
struct file *file = iocb->ki_filp;
struct vhost_net *n = file->private_data;
struct vhost_dev *dev = &n->dev;
int noblock = file->f_flags & O_NONBLOCK;

return vhost_chr_read_iter(dev, to, noblock);
}

static ssize_t vhost_net_chr_write_iter(struct kiocb *iocb,
struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
struct vhost_net *n = file->private_data;
struct vhost_dev *dev = &n->dev;

return vhost_chr_write_iter(dev, from);
}

static unsigned int vhost_net_chr_poll(struct file *file, poll_table *wait)
{
struct vhost_net *n = file->private_data;
struct vhost_dev *dev = &n->dev;

return vhost_chr_poll(file, dev, wait);
}

static const struct file_operations vhost_net_fops = {
.owner = THIS_MODULE,
.release = vhost_net_release,
.read_iter = vhost_net_chr_read_iter,
.write_iter = vhost_net_chr_write_iter,
.poll = vhost_net_chr_poll,
.unlocked_ioctl = vhost_net_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = vhost_net_compat_ioctl,
Expand Down
Loading

0 comments on commit 6b1e6cc

Please sign in to comment.