diff --git a/src/apps/basic/basic_apps.lua b/src/apps/basic/basic_apps.lua index 0e581fbee3..4964894ccc 100644 --- a/src/apps/basic/basic_apps.lua +++ b/src/apps/basic/basic_apps.lua @@ -2,6 +2,7 @@ module(...,package.seeall) local app = require("core.app") local buffer = require("core.buffer") +local freelist = require("core.freelist") local packet = require("core.packet") local link = require("core.link") @@ -24,15 +25,32 @@ end Source = setmetatable({zone = "Source"}, {__index = Basic}) -function Source:new() - return setmetatable({}, {__index=Source}) +function Source:new(size) + return setmetatable({size=tonumber(size) or 60}, {__index=Source}) +end + +-- Allocate receive buffers from the given freelist. +function Source:set_rx_buffer_freelist (fl) + assert(fl) + self.rx_buffer_freelist = fl end function Source:pull () + local fl = self.rx_buffer_freelist for _, o in ipairs(self.outputi) do for i = 1, link.nwritable(o) do + local b = nil + if fl then + if freelist.nfree(fl) > 0 then + b = freelist.remove(fl) + else + return + end + else + b = buffer.allocate() + end local p = packet.allocate() - packet.add_iovec(p, buffer.allocate(), 60) + packet.add_iovec(p, b, self.size) link.transmit(o, p) end end diff --git a/src/apps/vhost/vhost_user.c b/src/apps/vhost/vhost_user.c index 5a43b02b19..2a12ac20ee 100644 --- a/src/apps/vhost/vhost_user.c +++ b/src/apps/vhost/vhost_user.c @@ -12,129 +12,142 @@ #include "vhost.h" #include "vhost_user.h" -#define VHOST_USER_MSG_SIZE 12 - -int vhost_user_connect(const char *path) { - int sock; - struct sockaddr_un un; - - if ((sock = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) { - perror("socket"); - return -1; - } - - un.sun_family = AF_UNIX; - strncpy(un.sun_path, path, sizeof(un.sun_path)); - - if (connect(sock, (struct sockaddr *) &un, sizeof(un)) == -1) { - close(sock); - return -1; - } - - return sock; +#define MEMB_SIZE(t,m) (sizeof(((t*)0)->m)) +#define VHOST_USER_HDR_SIZE (MEMB_SIZE(struct vhost_user_msg,request) \ + + MEMB_SIZE(struct vhost_user_msg,flags) \ + + MEMB_SIZE(struct vhost_user_msg,size)) + +int vhost_user_connect(const char *path) +{ + int sock; + struct sockaddr_un un; + + if ((sock = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) { + perror("socket"); + return -1; + } + + un.sun_family = AF_UNIX; + strncpy(un.sun_path, path, sizeof(un.sun_path)); + + if (connect(sock, (struct sockaddr *) &un, sizeof(un)) == -1) { + close(sock); + return -1; + } + + return sock; } -int vhost_user_accept(int sock) { - int newsock; - if ((newsock = accept(sock, NULL, NULL)) == -1) { - assert(errno == EAGAIN); - } else { - assert(fcntl(newsock, F_SETFL, O_NONBLOCK) == 0); - } - return newsock; +int vhost_user_accept(int sock) +{ + int newsock; + if ((newsock = accept(sock, NULL, NULL)) == -1) { + assert(errno == EAGAIN); + } else { + assert(fcntl(newsock, F_SETFL, O_NONBLOCK) == 0); + } + return newsock; } -int vhost_user_send(int sock, struct vhost_user_msg *msg) { - int ret; +int vhost_user_send(int sock, struct vhost_user_msg *msg) +{ + int ret; - struct msghdr msgh; - struct iovec iov[1]; + struct msghdr msgh; + struct iovec iov[1]; - memset(&msgh, 0, sizeof(msgh)); + memset(&msgh, 0, sizeof(msgh)); - iov[0].iov_base = (void *)msg; - iov[0].iov_len = VHOST_USER_MSG_SIZE + msg->size; + iov[0].iov_base = (void *) msg; + iov[0].iov_len = VHOST_USER_HDR_SIZE + msg->size; - msgh.msg_iov = iov; - msgh.msg_iovlen = 1; + msgh.msg_iov = iov; + msgh.msg_iovlen = 1; - msgh.msg_control = 0; - msgh.msg_controllen = 0; + msgh.msg_control = 0; + msgh.msg_controllen = 0; - printf("vhost_user_send %d %d %d %d\n", msg->request, msg->flags, msg->size, (int)iov[0].iov_len); + printf("vhost_user_send %d %d %d %d\n", msg->request, msg->flags, msg->size, + (int) iov[0].iov_len); - do { - ret = sendmsg(sock, &msgh, 0); - } while (ret < 0 && errno == EINTR); + do { + ret = sendmsg(sock, &msgh, 0); + } while (ret < 0 && errno == EINTR); - if (ret < 0) { - perror("sendmsg"); - } + if (ret < 0) { + perror("sendmsg"); + } - return ret; + return ret; } -int vhost_user_receive(int sock, struct vhost_user_msg *msg, int *fds, int *nfds) { - struct msghdr msgh; - struct iovec iov[1]; - int ret; - - int fd_size = sizeof(int) * VHOST_USER_MEMORY_MAX_NREGIONS; - char control[CMSG_SPACE(fd_size)]; - struct cmsghdr *cmsg; - - memset(&msgh, 0, sizeof(msgh)); - memset(control, 0, sizeof(control)); - *nfds = 0; - - iov[0].iov_base = (void *) msg; - iov[0].iov_len = VHOST_USER_MSG_SIZE; - - msgh.msg_iov = iov; - msgh.msg_iovlen = 1; - msgh.msg_control = control; - msgh.msg_controllen = sizeof(control); - - do { - ret = recvmsg(sock, &msgh, MSG_DONTWAIT | MSG_WAITALL); - } while (ret < 0 && errno == EINTR); - if (ret == VHOST_USER_MSG_SIZE) { - if (msgh.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) { - ret = -1; - } else { - // Copy file descriptors - cmsg = CMSG_FIRSTHDR(&msgh); - if (cmsg && cmsg->cmsg_len > 0&& - cmsg->cmsg_level == SOL_SOCKET && - cmsg->cmsg_type == SCM_RIGHTS) { - if (fd_size >= cmsg->cmsg_len - CMSG_LEN(0)) { - fd_size = cmsg->cmsg_len - CMSG_LEN(0); - memcpy(fds, CMSG_DATA(cmsg), fd_size); - *nfds = fd_size / sizeof(int); +int vhost_user_receive(int sock, struct vhost_user_msg *msg, int *fds, + int *nfds) +{ + struct msghdr msgh; + struct iovec iov[1]; + int ret; + + int fd_size = sizeof(int) * VHOST_USER_MEMORY_MAX_NREGIONS; + char control[CMSG_SPACE(fd_size)]; + struct cmsghdr *cmsg; + + memset(&msgh, 0, sizeof(msgh)); + memset(control, 0, sizeof(control)); + *nfds = 0; + + iov[0].iov_base = (void *) msg; + iov[0].iov_len = VHOST_USER_HDR_SIZE; + + msgh.msg_iov = iov; + msgh.msg_iovlen = 1; + msgh.msg_control = control; + msgh.msg_controllen = sizeof(control); + + do { + ret = recvmsg(sock, &msgh, MSG_DONTWAIT | MSG_WAITALL); + } while (ret < 0 && errno == EINTR); + if (ret == VHOST_USER_HDR_SIZE) { + if (msgh.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) { + ret = -1; + } else { + // Copy file descriptors + cmsg = CMSG_FIRSTHDR(&msgh); + if (cmsg && cmsg->cmsg_len > 0&& + cmsg->cmsg_level == SOL_SOCKET && + cmsg->cmsg_type == SCM_RIGHTS) { + if (fd_size >= cmsg->cmsg_len - CMSG_LEN(0)) { + fd_size = cmsg->cmsg_len - CMSG_LEN(0); + memcpy(fds, CMSG_DATA(cmsg), fd_size); + *nfds = fd_size / sizeof(int); + } + } + if (msg->size > 0) { + do { + ret = read(sock, ((char*) msg) + VHOST_USER_HDR_SIZE, + msg->size); + } while (ret < 0 && errno == EINTR); + } } - } - if (msg->size > 0) { - do { - ret = read(sock, ((char*)msg)+VHOST_USER_MSG_SIZE, msg->size); - } while (ret < 0 && errno == EINTR); - } } - } - if (ret < 0 && errno != EAGAIN) { - perror("recvmsg"); - } - return ret; + if (ret < 0 && errno != EAGAIN) { + perror("recvmsg"); + } + return ret; } -void* vhost_user_map_guest_memory(int fd, int size) { - void *ptr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - return ptr == MAP_FAILED ? 0 : ptr; +void* vhost_user_map_guest_memory(int fd, int size) +{ + void *ptr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + return ptr == MAP_FAILED ? 0 : ptr; } -int vhost_user_unmap_guest_memory(void *ptr, int size) { - return munmap(ptr, size); +int vhost_user_unmap_guest_memory(void *ptr, int size) +{ + return munmap(ptr, size); } -int vhost_user_sync_shm(void *ptr, size_t size) { - return msync(ptr, size, MS_SYNC | MS_INVALIDATE); +int vhost_user_sync_shm(void *ptr, size_t size) +{ + return msync(ptr, size, MS_SYNC | MS_INVALIDATE); } diff --git a/src/apps/vhost/vhost_user.h b/src/apps/vhost/vhost_user.h index 846f5caf55..c3f9851dce 100644 --- a/src/apps/vhost/vhost_user.h +++ b/src/apps/vhost/vhost_user.h @@ -1,4 +1,6 @@ -enum { VHOST_USER_MEMORY_MAX_NREGIONS = 8 }; +enum { + VHOST_USER_MEMORY_MAX_NREGIONS = 8 +}; // vhost_user request types enum { @@ -17,40 +19,45 @@ enum { VHOST_USER_SET_VRING_KICK = 12, VHOST_USER_SET_VRING_CALL = 13, VHOST_USER_SET_VRING_ERR = 14, - VHOST_USER_NET_SET_BACKEND = 15, - VHOST_USER_ECHO = 16, VHOST_USER_MAX }; struct vhost_user_memory_region { - uint64_t guest_phys_addr; - uint64_t memory_size; - uint64_t userspace_addr; + uint64_t guest_phys_addr; + uint64_t memory_size; + uint64_t userspace_addr; }; struct vhost_user_memory { - uint32_t nregions; - uint32_t padding; - struct vhost_user_memory_region regions[VHOST_USER_MEMORY_MAX_NREGIONS]; + uint32_t nregions; + uint32_t padding; + struct vhost_user_memory_region regions[VHOST_USER_MEMORY_MAX_NREGIONS]; }; -struct vhost_user_msg { - int request; - uint32_t flags; - uint32_t size; - union { - uint64_t u64; - // defined in vhost.h - struct vhost_vring_state state; - struct vhost_vring_addr addr; - struct vhost_user_memory memory; - }; -} __attribute__((packed)); +enum { + VHOST_USER_VERSION_MASK = (0x3), + VHOST_USER_REPLY_MASK = (0x1 << 2), + VHOST_USER_VRING_IDX_MASK = (0xff), + VHOST_USER_VRING_NOFD_MASK = (0x1 << 8) +}; +struct vhost_user_msg { + int request; + uint32_t flags; + uint32_t size; + union { + uint64_t u64; + // defined in vhost.h + struct vhost_vring_state state; + struct vhost_vring_addr addr; + struct vhost_user_memory memory; + }; +}__attribute__((packed)); int vhost_user_connect(const char *path); int vhost_user_send(int sock, struct vhost_user_msg *msg); -int vhost_user_receive(int sock, struct vhost_user_msg *msg, int *fds, int *nfds); +int vhost_user_receive(int sock, struct vhost_user_msg *msg, int *fds, + int *nfds); void* vhost_user_map_guest_memory(int fd, int size); int vhost_user_unmap_guest_memory(void *ptr, int size); int vhost_user_sync_shm(void *ptr, size_t size); diff --git a/src/apps/vhost/vhost_user.lua b/src/apps/vhost/vhost_user.lua index cb388ac3b9..76d336c898 100644 --- a/src/apps/vhost/vhost_user.lua +++ b/src/apps/vhost/vhost_user.lua @@ -1,28 +1,21 @@ --- The VhostUser app implements I/O to a QEMU/KVM Virtio-net interface. -- -- See http://www.virtualopensystems.com/en/solutions/guides/snabbswitch-qemu/ module(...,package.seeall) -local app = require("core.app") -local link = require("core.link") -local config = require("core.config") local basic_apps= require("apps.basic.basic_apps") -local buffer = require("core.buffer") -local ffi = require("ffi") -local freelist = require("core.freelist") -local intel_app = require("apps.intel.intel_app") -local lib = require("core.lib") -local packet = require("core.packet") local pcap = require("apps.pcap.pcap") +local app = require("core.app") +local buffer = require("core.buffer") +local config = require("core.config") +local link = require("core.link") +local main = require("core.main") local pci = require("lib.hardware.pci") -local register = require("lib.hardware.register") +local net_device= require("lib.virtio.net_device") local timer = require("core.timer") -local vfio = require("lib.hardware.vfio") +local ffi = require("ffi") local C = ffi.C -require("lib.virtio.virtio.h") -require("lib.virtio.virtio_vring_h") require("apps.vhost.vhost_h") require("apps.vhost.vhost_user_h") @@ -32,18 +25,17 @@ VhostUser = {} function VhostUser:new (socket_path) local o = { state = 'init', - msg = ffi.new("struct vhost_user_msg"), - nfds = ffi.new("int[1]"), - fds = ffi.new("int[?]", C.VHOST_USER_MEMORY_MAX_NREGIONS), - socket_path = socket_path, - callfd = {}, - kickfd = {}, - -- buffer records that are not currently in use - buffer_recs = freelist.new("struct buffer *", 32*1024), - -- buffer records populated with available VM memory - vring_transmit_buffers = freelist.new("struct buffer *", 32*1024) - } - return setmetatable(o, {__index = VhostUser}) + dev = nil, + msg = ffi.new("struct vhost_user_msg"), + nfds = ffi.new("int[1]"), + fds = ffi.new("int[?]", C.VHOST_USER_MEMORY_MAX_NREGIONS), + socket_path = socket_path, + -- process qemu messages delay counter + process_qemu_counter = 0 + } + self = setmetatable(o, {__index = VhostUser}) + self.dev = net_device.VirtioNetDevice:new(self) + return self end function VhostUser:pull () @@ -51,7 +43,9 @@ function VhostUser:pull () self:connect() else self:process_qemu_requests() - self:poll_vring_packets() + if self.vhost_ready then + self.dev:poll_vring_packets() + end end end @@ -64,184 +58,6 @@ function VhostUser:connect () end end -function VhostUser:poll_vring_packets () - if self.vhost_ready then - self:receive_packets_from_vm() - self:get_transmit_buffers_from_vm() - self:transmit_packets_to_vm() - end -end - --- Receive all available packets from the virtual machine. -function VhostUser:receive_packets_from_vm () - assert(self.connected) - while self.rxavail ~= self.rxring.avail.idx do - local descriptor_id = self.rxring.avail.ring[self.rxavail % self.rx_vring_num] - local p = packet.allocate() - local need_header = true - local head_idx = nil - repeat - debug("received packet idx = " .. tostring(descriptor_id)) - local descriptor = self.rxring.desc[descriptor_id] - local guest_addr = descriptor.addr - local snabb_addr = map_from_guest(guest_addr, self.mem_table) - local pointer = ptr(snabb_addr) - local len = descriptor.len - -- This code is not sufficiently general, but here is how it - -- works: Linux virtio-net driver is sending packets with the - -- first buffer (the "head") containing only metadata. We - -- copy this data into the packet struct (and exclude it from - -- our iovecs). - -- - -- To return these buffers to Linux it seems that they expect - -- us to place the first buffer (containing metadata) onto - -- the 'used' ring and they will rely on chaining with the - -- NEXT flag to find and reclaim all the buffers for this - -- packet. - -- - -- I'm a little surprised/confused/annoyed that buffers - -- aren't returned individually, which would seem to make - -- life simpler. I may be missing something though. -luke - if need_header then - if len ~= ffi.sizeof("struct virtio_net_hdr_mrg_rxbuf") then - error("NYI packet from VM not according to assumptions. length = " .. tostring(len)) - end - assert(bit.band(descriptor.flags, C.VIO_DESC_F_NEXT) ~= 0) - ffi.copy(p.info, pointer, ffi.sizeof("struct packet_info")) - need_header = false - head_idx = descriptor_id - else - local b = freelist.remove(self.buffer_recs) or lib.malloc("struct buffer") - b.pointer = pointer - b.physical = snabb_addr - b.size = len - -- Tag the first buffer with the head index that needs to be freed. - -- - -- This is not optimal: freeing this one buffer will cause - -- all buffers in the chain to be reused by qemu, so bad - -- things will happen unless we free all virtio packet - -- buffers at the same time. Have to develop a more - -- complete understanding of Virtio and then handle this - -- in a good way. - if head_idx then - b.origin.type = C.BUFFER_ORIGIN_VIRTIO - b.origin.info.virtio.device_id = self.virtio_device_id - b.origin.info.virtio.ring_id = 1 -- rxring - b.origin.info.virtio.descriptor_id = head_idx - head_idx = nil - end - packet.add_iovec(p, b, len) - end - descriptor_id = descriptor.next - until bit.band(descriptor.flags, C.VIO_DESC_F_NEXT) == 0 - self.rxavail = (self.rxavail + 1) % 65536 - if self.output.tx then - link.transmit(self.output.tx, p) - else - debug("droprx", "len", p.length, "niovecs", p.niovecs) - packet.deref(p) - end - end -end - --- Populate the `self.vring_transmit_buffers` freelist with buffers from the VM. -function VhostUser:get_transmit_buffers_from_vm () - while self.txavail ~= self.txring.avail.idx do - -- Extract the first buffer and any that are chained to it via NEXT flag - local index = self.txring.avail.ring[self.txavail % self.tx_vring_num] - repeat - local desc = self.txring.desc[index] - local b = freelist.remove(self.buffer_recs) or lib.malloc("struct buffer") - -- XXX The "headroom" optimization: - -- We assume that the buffer will later be prepended with a - -- virtio_net_hdr_mrg_rxbuf struct for transmit to the VM. We - -- reserve space for this header at the start of the packet, - -- before buffer.pointer, so that we can fill in the header - -- later without having to copy/move the rest of the packet - -- data. - -- - -- This assumption breaks down for multi-buffer packets. - -- Haven't worked out how to avoid copies there yet. - local headroom = ffi.sizeof("struct virtio_net_hdr_mrg_rxbuf") - local addr = map_from_guest(desc.addr, self.mem_table) + headroom - b.pointer = ffi.cast("char*", addr) - -- XXX Set physical address to virtual address. This is - -- broken unless we are using vfio and setting up a 1:1 - -- mapping between virtual and IO addresses (so we have to - -- make sure that we do that, for now.) - b.physical = ffi.cast("uint64_t", addr) - b.size = desc.len - headroom - -- Track the origin of this buffer so we can do zero-copy tricks. - b.origin.type = C.BUFFER_ORIGIN_VIRTIO - b.origin.info.virtio.device_id = self.virtio_device_id - b.origin.info.virtio.ring_id = 0 -- tx ring - b.origin.info.virtio.descriptor_id = index - debug("added buffer", "size", desc.len, "flags", desc.flags, "count", freelist.nfree(self.vring_transmit_buffers)) - freelist.add(self.vring_transmit_buffers, b) - self.txavail = (self.txavail + 1) % 65536 - -- Continue to traverse the descriptor chain - index = desc.next - until bit.band(desc.flags, C.VIRTIO_DESC_F_NEXT) == 0 - end -end - --- Prepared argument for writing a 1 to an eventfd. -eventfd_one = ffi.new("uint64_t[1]", {1}) - --- Transmit packets from the app input queue to the VM. -function VhostUser:transmit_packets_to_vm () - local notify_needed = false - local l = self.input.rx - if l == nil then return end - while not link.empty(l) and (self.txring.used.idx + 1) % 65536 ~= self.txused do - local p = link.receive(l) - assert(p.niovecs == 1, "NYI: multi-buffer packets") - local iovec = p.iovecs[0] - if self:can_zerocopy_transmit(iovec) then - local b = iovec.buffer - assert(b.origin.type == C.BUFFER_ORIGIN_VIRTIO) - local headroom = ffi.sizeof("struct virtio_net_hdr_mrg_rxbuf") - local start = b.pointer - headroom - local mrg = ffi.cast("struct virtio_net_hdr_mrg_rxbuf &", start) - ffi.copy(mrg, p.info, ffi.sizeof(p.info)) - mrg.num_buffers = 1 - local used = self.txring.used.ring[self.txused] - used.id = b.origin.info.virtio.descriptor_id - used.len = iovec.length + headroom - self.txused = (self.txused + 1) % 65536 - else - assert(false, "NYI packet copy into vring required") - end - notify_needed = true - end - if notify_needed then - debug("notify needed") - self.txring.used.idx = self.txused - C.write(self.callfd[0], eventfd_one, 8) - end -end - -function VhostUser:can_zerocopy_transmit (iovec) - return iovec.buffer.origin.type == C.BUFFER_ORIGIN_VIRTIO - and iovec.buffer.origin.info.virtio.device_id == self.virtio_device_id - and iovec.buffer.origin.info.virtio.ring_id == 0 - and iovec.offset == 0 -end - --- Return a buffer to the virtual machine. -function VhostUser:return_virtio_buffer (b) - assert(b.origin.info.virtio.device_id == self.virtio_device_id) - if b.origin.info.virtio.ring_id == 1 then -- Receive buffer? - local used = self.rxring.used.ring[self.rxring.used.idx % self.rx_vring_num] - used.id = b.origin.info.virtio.descriptor_id - used.len = b.size - debug("Returned buffer", used.id, self.rxring.used.idx) - self.rxring.used.idx = (self.rxring.used.idx + 1) % 65536 - -- XXX Call at most once per pull() - C.write(self.callfd[1], eventfd_one, 8) - end -end - -- vhost_user protocol request handlers. -- Table of request code -> name of handler method @@ -260,106 +76,125 @@ handler_names = { [C.VHOST_USER_GET_VRING_BASE] = 'get_vring_base', [C.VHOST_USER_SET_VRING_KICK] = 'set_vring_kick', [C.VHOST_USER_SET_VRING_CALL] = 'set_vring_call', - [C.VHOST_USER_SET_VRING_ERR] = 'set_vring_err', - [C.VHOST_USER_NET_SET_BACKEND] = 'net_set_backend', - [C.VHOST_USER_ECHO] = 'echo' + [C.VHOST_USER_SET_VRING_ERR] = 'set_vring_err' } -- Process all vhost_user requests from QEMU. function VhostUser:process_qemu_requests () + self.process_qemu_counter = self.process_qemu_counter + 1 + if self.vhost_ready and self.process_qemu_counter % 1000000 ~= 0 then return end + local msg = self.msg - while C.vhost_user_receive(self.socket, msg, self.fds, self.nfds) >= 0 do - assert(msg.request >= 0 and msg.request <= C.VHOST_USER_MAX) - debug("Got vhost_user request", handler_names[msg.request], msg.request) - local method = self[handler_names[msg.request]] - if method then - method(self, msg, self.fds, self.nfds[0]) + local stop = false + + repeat + local ret = C.vhost_user_receive(self.socket, msg, self.fds, self.nfds) + + if ret > 0 then + assert(msg.request >= 0 and msg.request <= C.VHOST_USER_MAX) + debug("Got vhost_user request", handler_names[msg.request], msg.request) + local method = self[handler_names[msg.request]] + if method then + method(self, msg, self.fds, self.nfds[0]) + else + error(string.format("vhost_user unrecognized request: %d", msg.request)) + end + msg.request = -1; else - error(string.format("vhost_user unrecognized request: %d", msg.request)) + stop = true + if ret == 0 then + print ("Connection went down") + self.socket = -1 + self.connected = false + self.vhost_ready = false + if self.link_down_proc then self.link_down_proc() end + end end - end + until stop + end function VhostUser:none (msg) + error(string.format("vhost_user unrecognized request: %d", msg.request)) end function VhostUser:get_features (msg) - msg.size = 8 - msg.u64 = C.VIRTIO_NET_F_MRG_RXBUF + C.VIRTIO_NET_F_CSUM + msg.u64 = self.dev:get_features() + msg.size = ffi.sizeof("uint64_t") -- In future add TSO4/TSO6/UFO/ECN and control channel self:reply(msg) end function VhostUser:set_features (msg) - debug("features = " .. tostring(msg.u64)) + self.dev:set_features(msg.u64) end function VhostUser:set_owner (msg) + debug("set_owner") +end + +function VhostUser:reset_owner (msg) + debug("reset_owner") end function VhostUser:set_vring_num (msg) - local n = tonumber(msg.state.num) - if msg.state.index == 0 then - self.tx_vring_num = n - else - self.rx_vring_num = n - end + self.dev:set_vring_num(msg.state.index, msg.state.num) end function VhostUser:set_vring_call (msg, fds, nfds) - assert(nfds == 1) - local idx = tonumber(msg.u64) - self.callfd[idx] = fds[0] + local idx = tonumber(bit.band(msg.u64, C.VHOST_USER_VRING_IDX_MASK)) + local validfd = bit.band(msg.u64, C.VHOST_USER_VRING_NOFD_MASK) == 0 + + assert(idx<42) + if validfd then + assert(nfds == 1) + self.dev:set_vring_call(idx, fds[0]) + end end function VhostUser:set_vring_kick (msg, fds, nfds) - local idx = tonumber(msg.u64) + local idx = tonumber(bit.band(msg.u64, C.VHOST_USER_VRING_IDX_MASK)) + local validfd = bit.band(msg.u64, C.VHOST_USER_VRING_NOFD_MASK) == 0 + assert(idx < 42) - if nfds == 1 then - self.kickfd[idx] = fds[0] + if validfd then + assert(nfds == 1) + self.dev:set_vring_kick(idx, fds[0]) + else + print("Should start polling on virtq "..tonum(idx)) end end function VhostUser:set_vring_addr (msg) - local desc = map_from_qemu(msg.addr.desc_user_addr, self.mem_table) - local used = map_from_qemu(msg.addr.used_user_addr, self.mem_table) - local avail = map_from_qemu(msg.addr.avail_user_addr, self.mem_table) + local desc = self.dev:map_from_qemu(msg.addr.desc_user_addr) + local used = self.dev:map_from_qemu(msg.addr.used_user_addr) + local avail = self.dev:map_from_qemu(msg.addr.avail_user_addr) local ring = { desc = ffi.cast("struct vring_desc *", desc), - used = ffi.cast("struct vring_used &", used), - avail = ffi.cast("struct vring_avail &", avail) } - if msg.addr.index == 0 then - self.txring = ring - self.txused = ring.used.idx - else - self.rxring = ring - self.rxused = ring.used.idx - end - if self.rxring and self.txring then + used = ffi.cast("struct vring_used *", used), + avail = ffi.cast("struct vring_avail *", avail) } + + self.dev:set_vring_addr(msg.addr.index, ring) + + if self.dev:ready() then self.vhost_ready = true - self.virtio_device_id = buffer.add_virtio_device(self) + self.dev:set_virtio_device_id(buffer.add_virtio_device(self.dev)) debug("Connected and initialized vhost_user.") end end function VhostUser:set_vring_base (msg) debug("set_vring_base", msg.state.index, msg.state.num) - if msg.state.index == 0 then - self.txavail = msg.state.num - else - self.rxavail = msg.state.num - end + self.dev:set_vring_base(msg.state.index, msg.state.num) end function VhostUser:get_vring_base (msg) - msg.size = 8 - local n - if msg.state.index == 0 then n = self.txavail else n = self.rxavail end - msg.u64 = n or 0 + msg.state.num = self.dev:get_vring_base(msg.state.index) + msg.size = ffi.sizeof("struct vhost_vring_state") self:reply(msg) end function VhostUser:set_mem_table (msg, fds, nfds) - self.mem_table = {} + mem_table = {} assert(nfds == msg.memory.nregions) for i = 0, msg.memory.nregions - 1 do assert(fds[i] > 0) -- XXX vapp_server.c uses 'if' @@ -369,16 +204,12 @@ function VhostUser:set_mem_table (msg, fds, nfds) C.mmap_memory(pointer, size, ffi.cast("uint64_t",pointer), true, true) local guest = msg.memory.regions[i].guest_phys_addr local qemu = msg.memory.regions[i].userspace_addr - -- register with vfio - table.insert(self.mem_table, { guest = guest, - qemu = qemu, - snabb = ffi.cast("int64_t", pointer), - size = tonumber(size) }) + table.insert(mem_table, { guest = guest, + qemu = qemu, + snabb = ffi.cast("int64_t", pointer), + size = tonumber(size) }) end -end - -function VhostUser:echo (msg) - self:reply(msg) + self.dev:set_mem_table(mem_table) end function VhostUser:reply (req) @@ -387,105 +218,67 @@ function VhostUser:reply (req) C.vhost_user_send(self.socket, req) end --- Address space remapping. - -function map_to_guest (addr, mem_table) - for _,m in ipairs(mem_table) do - if addr >= m.snabb and addr < m.snabb + m.size then - return addr + m.guest - m.snabb - end - end - error("mapping to guest address failed") -end - -function map_from_guest (addr, mem_table) - for _,m in ipairs(mem_table) do - if addr >= m.guest and addr < m.guest + m.size then - return addr + m.snabb - m.guest - end - end - error("mapping to host address failed" .. tostring(ffi.cast("void*",addr))) +function VhostUser:report() + self.dev:report() end -function map_from_qemu (addr, mem_table) - for _,m in ipairs(mem_table) do - if addr >= m.qemu and addr < m.qemu + m.size then - return addr + m.snabb - m.qemu - end - end - error("mapping to host address failed" .. tostring(ffi.cast("void*",addr))) +function VhostUser:rx_buffers() + return self.dev:rx_buffers() end function selftest () print("selftest: vhost_user") - if not vfio.is_vfio_available() then - print("VFIO not available\nTest skipped") - os.exit(app.test_skipped_code) - end -- Create an app network that proxies packets between a vhost_user - -- port (qemu) and an Intel port (in loopback mode). Create - -- separate pcap traces for packets received from vhost and intel. - -- + -- port (qemu) and a sink. Create + -- separate pcap traces for packets received from vhost. + -- -- schema for traffic from the VM: - -- - -- vhost -> tee -> intel + -- + -- vhost -> tee -> sink -- | -- v -- vhost pcap - -- - -- schema for traffic from the intel NIC: - -- vhost <- tee <- intel - -- | - -- v - -- intel pcap - -- - local pciid = os.getenv("SNABB_TEST_INTEL10G_PCI_ID") - if not pciid then - print("SNABB_TEST_INTEL10G_PCI_ID was not set\nTest skipped") - os.exit(app.test_skipped_code) - end + -- local vhost_user_sock = os.getenv("SNABB_TEST_VHOST_USER_SOCKET") if not vhost_user_sock then print("SNABB_TEST_VHOST_USER_SOCKET was not set\nTest skipped") os.exit(app.test_skipped_code) end - - pci.unbind_device_from_linux(pciid) - vfio.setup_vfio(pciid) - vfio.bind_device_to_vfio(pciid) local c = config.new() config.app(c, "vhost_user", VhostUser, vhost_user_sock) - config.app(c, "vhost_dump", pcap.PcapWriter, "vhost_vm_dump.cap") + --config.app(c, "vhost_dump", pcap.PcapWriter, "vhost_vm_dump.cap") config.app(c, "vhost_tee", basic_apps.Tee) - config.app(c, "intel", intel_app.Intel82599, pciid) - config.app(c, "intel_dump", pcap.PcapWriter, "vhost_nic_dump.cap") - config.app(c, "intel_tee", basic_apps.Tee) + config.app(c, "sink", basic_apps.Sink) + config.app(c, "source", basic_apps.Source, "250") + config.app(c, "source_tee", basic_apps.Tee) + config.link(c, "vhost_user.tx -> vhost_tee.input") - config.link(c, "vhost_tee.dump -> vhost_dump.input") - config.link(c, "vhost_tee.traffic -> intel.rx") - config.link(c, "intel.tx -> intel_tee.input") - config.link(c, "intel_tee.dump -> intel_dump.input") - config.link(c, "intel_tee.traffic -> vhost_user.rx") + --config.link(c, "vhost_tee.dump -> vhost_dump.input") + config.link(c, "vhost_tee.traffic -> sink.in") + + config.link(c, "source.tx -> source_tee.input") + config.link(c, "source_tee.traffic -> vhost_user.rx") + app.configure(c) local vhost_user = app.app_table.vhost_user - local intel = app.app_table.intel - intel:set_rx_buffer_freelist(vhost_user.vring_transmit_buffers) + vhost_user.link_down_proc = function() + main.exit(0) + end + local source = app.app_table.source + source:set_rx_buffer_freelist(vhost_user:rx_buffers()) + local fn = function () - local vu = app.apps.vhost_user - app.report() - if vhost_user.vhost_ready then - debug("txavail", vhost_user.txring.avail.idx, - "txused", vhost_user.txring.used.idx, - "rxavail", vhost_user.rxring.avail.idx, - "rxused", vhost_user.rxring.used.idx) - end - end + local vu = app.apps.vhost_user + app.report() + if vhost_user.vhost_ready then + vhost_user:report() + end + end timer.init() - timer.activate(timer.new("report", fn, 1e9, 'repeating')) - -- For interactive testing, uncommon this line to run without time limit: - -- app.main() - app.main({duration = 3.5}) + timer.activate(timer.new("report", fn, 10e9, 'repeating')) + + app.main() end function ptr (x) return ffi.cast("void*",x) end diff --git a/src/core/buffer.lua b/src/core/buffer.lua index b826c7b04f..58c71c1b9e 100644 --- a/src/core/buffer.lua +++ b/src/core/buffer.lua @@ -37,9 +37,10 @@ end -- Free a buffer that is no longer in use. function free (b) - freelist.add(buffers, b) if b.origin.type == C.BUFFER_ORIGIN_VIRTIO then virtio_devices[b.origin.info.virtio.device_id]:return_virtio_buffer(b) + else + freelist.add(buffers, b) end end diff --git a/src/core/packet.h b/src/core/packet.h index 185fb4f356..ce4df1b872 100644 --- a/src/core/packet.h +++ b/src/core/packet.h @@ -3,6 +3,8 @@ // For example, buffers belonging to Virtio devices need to be // returned to the device when freed. +struct buffer; + struct buffer_origin { enum buffer_origin_type { BUFFER_ORIGIN_UNKNOWN = 0, @@ -13,7 +15,9 @@ struct buffer_origin { struct buffer_origin_info_virtio { int16_t device_id; int16_t ring_id; - int16_t descriptor_id; + int16_t header_id; + char *header_pointer; // virtual address in this process + uint32_t total_size; // how many bytes in all buffers } virtio; } info; }; diff --git a/src/core/packet.lua b/src/core/packet.lua index 09346d341b..9a7aff0c7f 100644 --- a/src/core/packet.lua +++ b/src/core/packet.lua @@ -15,6 +15,7 @@ initial_fuel = 1000 max_packets = 1e6 packets_fl = freelist.new("struct packet *", max_packets) packets = ffi.new("struct packet[?]", max_packets) +local packet_size = ffi.sizeof("struct packet") function module_init () for i = 0, max_packets-1 do @@ -156,7 +157,7 @@ function free (p) for i = 0, p.niovecs-1 do buffer.free(p.iovecs[i].buffer) end - ffi.fill(p, ffi.sizeof("struct packet"), 0) + ffi.fill(p, packet_size, 0) p.refcount = 1 p.fuel = initial_fuel freelist.add(packets_fl, p) diff --git a/src/designs/nfv/nfv.lua b/src/designs/nfv/nfv.lua new file mode 100644 index 0000000000..1c3ef1de6f --- /dev/null +++ b/src/designs/nfv/nfv.lua @@ -0,0 +1,51 @@ +module(...,package.seeall) + +local app = require("core.app") +local basic_apps = require("apps.basic.basic_apps") +local config = require("core.config") +local intel_app = require("apps.intel.intel_app") +local lib = require("core.lib") +local pcap = require("apps.pcap.pcap") +local timer = require("core.timer") +local pci = require("lib.hardware.pci") +local vfio = require("lib.hardware.vfio") +local vhost_user = require("apps.vhost.vhost_user") + +function main () + print("Starting NFV...") + local pciaddr = os.getenv("NFV_PCI") or error("No $NFV_PCI set.") + local socket = os.getenv("NFV_SOCKET") or error("No $NFV_SOCKET set.") + local trace = os.getenv("NFV_TRACE") + pci.unbind_device_from_linux(pciaddr) + local c = config.new() + config.app(c, "vm", vhost_user.VhostUser, socket) + config.app(c, "nic", intel_app.Intel82599, ([[{pciaddr='%s'}]]):format(pciaddr)) + if not trace then + print("No trace file ($NFV_TRACE) configured.") + config.link(c, "vm.tx -> nic.rx") + config.link(c, "nic.tx -> vm.rx") + else + print("Tracing to files " .. trace .. ".{vm,nic}") + config.app(c, "vm_tee", basic_apps.Tee) + config.app(c, "nic_tee", basic_apps.Tee) + config.app(c, "vm_trace", pcap.PcapWriter, trace..".vm") + config.app(c, "nic_trace", pcap.PcapWriter, trace..".nic") + config.link(c, "vm.tx -> vm_tee.input") + config.link(c, "vm_tee.tx -> nic.rx") + config.link(c, "vm_tee.tap -> vm_trace.input") + config.link(c, "nic.tx -> nic_tee.input") + config.link(c, "nic_tee.tx -> vm.rx") + config.link(c, "nic_tee.tap -> nic_trace.input") + end + app.configure(c) + -- Setup zero-copy + local nic, vm = app.app_table.nic, app.app_table.vm + nic:set_rx_buffer_freelist(vm:rx_buffers()) + timer.init() + timer.activate(timer.new("report", app.report, 1e9, 'repeating')) + print("Entering app.main()") + app.main() +end + +main() + diff --git a/src/lib/virtio/net_device.lua b/src/lib/virtio/net_device.lua new file mode 100644 index 0000000000..3d22ce02ad --- /dev/null +++ b/src/lib/virtio/net_device.lua @@ -0,0 +1,349 @@ +-- Implements virtio-net device + + +module(...,package.seeall) + +local buffer = require("core.buffer") +local freelist = require("core.freelist") +local lib = require("core.lib") +local link = require("core.link") +local memory = require("core.memory") +local packet = require("core.packet") +local timer = require("core.timer") +local ffi = require("ffi") +local C = ffi.C + +require("lib.virtio.virtio.h") +require("lib.virtio.virtio_vring_h") + +local char_ptr_t = ffi.typeof("char *") +local virtio_net_hdr_size = ffi.sizeof("struct virtio_net_hdr") +local packet_info_size = ffi.sizeof("struct packet_info") + +local invalid_header_id = 0xffff + +--[[ + A list of what needs to be implemented in order to fully support + some of the options. + + - VIRTIO_NET_F_CSUM - enables the SG I/O (resulting in + multiple chained data buffers in our TX path(self.rxring)) + Required by GSO/TSO/USO. Requires CSUM offload support in the + HW driver (now intel10g) + + - VIRTIO_NET_F_MRG_RXBUF - enables multiple chained buffers in our RX path + (self.txring). Also chnages the virtio_net_hdr to virtio_net_hdr_mrg_rxbuf + + - VIRTIO_F_ANY_LAYOUT - the virtio_net_hdr/virtio_net_hdr_mrg_rxbuf is "prepended" + in the first data buffer instead if provided by a separate descriptor. + Supported in fairly recent (3.13) Linux kernels + + - VIRTIO_RING_F_INDIRECT_DESC - support indirect buffer descriptors. + + - VIRTIO_NET_F_CTRL_VQ - creates a separate control virt queue + + - VIRTIO_NET_F_MQ - multiple RX/TX queues, usefull for SMP (host/guest). + Requires VIRTIO_NET_F_MQ + +--]] +local supported_features = 0 + +VirtioNetDevice = {} + +function VirtioNetDevice:new(owner) + assert(owner) + local o = { + owner = owner, + callfd = {}, + kickfd = {}, + tx_vring_num = 0, + rx_vring_num = 0, + -- buffer records that are not currently in use + buffer_recs = freelist.new("struct buffer *", 32*1024), + -- buffer records populated with available VM memory + vring_transmit_buffers = freelist.new("struct buffer *", 32*1024), + } + return setmetatable(o, {__index = VirtioNetDevice}) +end + +function VirtioNetDevice:poll_vring_packets () + -- RX + self:receive_packets_from_vm() + self:rx_signal_used() + -- TX + self:get_transmit_buffers_from_vm() + self:transmit_packets_to_vm() +end + +-- Receive all available packets from the virtual machine. +function VirtioNetDevice:receive_packets_from_vm () + + while self.rxavail ~= self.rxring.avail.idx do + local p = packet.allocate() + -- Header + local header_id = self.rxring.avail.ring[self.rxavail % self.rx_vring_num] + local header_desc = self.rxring.desc[header_id] + local header_pointer = ffi.cast(char_ptr_t,self:map_from_guest(header_desc.addr)) + --assert(header_desc.len == virtio_net_hdr_size) + local total_size = virtio_net_hdr_size + local data_desc = header_desc + --assert(bit.band(header_desc.flags, C.VIRTIO_DESC_F_NEXT) ~= 0) + + -- Fill in packet header + ffi.copy(p.info, header_pointer, packet_info_size) + + -- Data buffer + repeat + data_desc = self.rxring.desc[data_desc.next] + local b = freelist.remove(self.buffer_recs) or lib.malloc("struct buffer") + + local addr = self:map_from_guest(data_desc.addr) + b.pointer = ffi.cast(char_ptr_t, addr) + b.physical = self:translate_physical_addr(addr) + b.size = data_desc.len + + -- The total size will be added to the first buffer virtio info + total_size = total_size + b.size + + -- Fill buffer origin info + b.origin.type = C.BUFFER_ORIGIN_VIRTIO + -- Set invalid header_id for all buffers. The first will contain + -- the real header_id, set after the loop + b.origin.info.virtio.header_id = invalid_header_id + + packet.add_iovec(p, b, b.size) + until bit.band(data_desc.flags, C.VIRTIO_DESC_F_NEXT) == 0 + + -- Fill in the first buffer with header info + local v = p.iovecs[0].buffer.origin.info.virtio + v.device_id = self.virtio_device_id + v.ring_id = 1 -- rx ring + v.header_id = header_id + v.header_pointer = header_pointer + v.total_size = total_size + + self.rxavail = (self.rxavail + 1) % 65536 + + local l = self.owner.output.tx + if l then + link.transmit(l, p) + else + debug("droprx", "len", p.length, "niovecs", p.niovecs) + packet.deref(p) + end + end +end + +-- Populate the `self.vring_transmit_buffers` freelist with buffers from the VM. +function VirtioNetDevice:get_transmit_buffers_from_vm () + while self.txavail ~= self.txring.avail.idx do + -- Header + local header_id = self.txring.avail.ring[self.txavail % self.tx_vring_num] + local header_desc = self.txring.desc[header_id] + local header_pointer = ffi.cast(char_ptr_t,self:map_from_guest(header_desc.addr)) + --assert(header_desc.len == virtio_net_hdr_size) + local total_size = virtio_net_hdr_size + local data_desc = header_desc + --assert(bit.band(header_desc.flags, C.VIRTIO_DESC_F_NEXT) ~= 0) + + -- Data buffer + data_desc = self.txring.desc[data_desc.next] + local b = freelist.remove(self.buffer_recs) or lib.malloc("struct buffer") + + local addr = self:map_from_guest(data_desc.addr) + b.pointer = ffi.cast(char_ptr_t, addr) + b.physical = self:translate_physical_addr(addr) + b.size = data_desc.len + + -- The total size will be added to the first buffer virtio info + total_size = total_size + b.size + + -- Fill buffer origin info + b.origin.type = C.BUFFER_ORIGIN_VIRTIO + local v = b.origin.info.virtio + v.device_id = self.virtio_device_id + v.ring_id = 0 -- tx ring + v.header_id = header_id + v.header_pointer = header_pointer + v.total_size = total_size + + freelist.add(self.vring_transmit_buffers, b) + + self.txavail = (self.txavail + 1) % 65536 + end +end + +-- Prepared argument for writing a 1 to an eventfd. +local eventfd_one = ffi.new("uint64_t[1]", {1}) + +-- Transmit packets from the app input queue to the VM. +function VirtioNetDevice:transmit_packets_to_vm () + local l = self.owner.input.rx + if not l then return end + while not link.empty(l) do + local p = link.receive(l) + local iovec = p.iovecs[0] + local b = iovec.buffer + local virtio_hdr = b.origin.info.virtio.header_pointer + + --assert(b.origin.type == C.BUFFER_ORIGIN_VIRTIO) + + ffi.copy(virtio_hdr, p.info, packet_info_size) + + local used = self.txring.used.ring[self.txused%self.tx_vring_num] + local v = b.origin.info.virtio + --assert(v.header_id ~= invalid_header_id) + used.id = v.header_id + used.len = virtio_net_hdr_size + iovec.length + self.txused = (self.txused + 1) % 65536 + + packet.deref(p) + end + + if self.txring.used.idx ~= self.txused then + self.txring.used.idx = self.txused + C.write(self.callfd[0], eventfd_one, 8) + end +end + +-- Return a buffer to the virtual machine. +function VirtioNetDevice:return_virtio_buffer (b) + freelist.add(self.buffer_recs, b) + if b.origin.info.virtio.ring_id == 1 then -- Receive buffer? + + -- Only do this for the first buffer in the chain. + -- Distiguish it by the valid header_id + -- Other buffers in the chain are safe as long as + -- rx_signal_used() is not called. So be sure to free + -- all of them at one poll. + if b.origin.info.virtio.header_id ~= invalid_header_id then + local used = self.rxring.used.ring[self.rxused % self.rx_vring_num] + used.id = b.origin.info.virtio.header_id + used.len = b.origin.info.virtio.total_size + + self.rxused = (self.rxused + 1) % 65536 + end + end +end + +-- Advance the rx used ring and sugnal up +function VirtioNetDevice:rx_signal_used() + if self.rxring.used.idx ~= self.rxused then + self.rxring.used.idx = self.rxused + C.write(self.callfd[1], eventfd_one, 8) + end +end + +function VirtioNetDevice:translate_physical_addr (addr) + -- Assuming no-IOMMU + return memory.virtual_to_physical(addr) +end + +-- Address space remapping. +function VirtioNetDevice:map_to_guest (addr) + for _,m in ipairs(self.mem_table) do + if addr >= m.snabb and addr < m.snabb + m.size then + return addr + m.guest - m.snabb + end + end + error("mapping to guest address failed") +end + +function VirtioNetDevice:map_from_guest (addr) + for _,m in ipairs(self.mem_table) do + if addr >= m.guest and addr < m.guest + m.size then + return addr + m.snabb - m.guest + end + end + error("mapping to host address failed" .. tostring(ffi.cast("void*",addr))) +end + +function VirtioNetDevice:map_from_qemu (addr) + for _,m in ipairs(self.mem_table) do + if addr >= m.qemu and addr < m.qemu + m.size then + return addr + m.snabb - m.qemu + end + end + error("mapping to host address failed" .. tostring(ffi.cast("void*",addr))) +end + +function VirtioNetDevice:get_features() + return supported_features +end + +function VirtioNetDevice:set_features(features) + print(string.format("Set features 0x%x", tonumber(features))) +end + +function VirtioNetDevice:set_vring_num(idx, num) + + local n = tonumber(num) + if idx == 0 then + self.tx_vring_num = n + else + self.rx_vring_num = n + end +end + +function VirtioNetDevice:set_vring_call(idx, fd) + self.callfd[idx] = fd +end + +function VirtioNetDevice:set_vring_kick(idx, fd) + self.kickfd[idx] = fd +end + +function VirtioNetDevice:set_vring_addr(idx, ring) + if idx == 0 then + self.txring = ring + self.txused = ring.used.idx + else + self.rxring = ring + self.rxused = ring.used.idx + end +end + +function VirtioNetDevice:ready() + return self.txring and self.rxring +end + +function VirtioNetDevice:set_vring_base(idx, num) + if idx == 0 then + self.txavail = num + else + self.rxavail = num + end +end + +function VirtioNetDevice:get_vring_base(idx) + local n = 0 + if idx == 0 then + n = self.txavail + else + n = self.rxavail + end + return n +end + +function VirtioNetDevice:set_mem_table(mem_table) + self.mem_table = mem_table +end + +function VirtioNetDevice:report() + debug("txavail", self.txring.avail.idx, + "txused", self.txring.used.idx, + "rxavail", self.rxring.avail.idx, + "rxused", self.rxring.used.idx) +end + +function VirtioNetDevice:rx_buffers() + return self.vring_transmit_buffers +end + +function VirtioNetDevice:set_virtio_device_id(virtio_device_id) + self.virtio_device_id = virtio_device_id +end + +function debug (...) + print(...) +end diff --git a/src/lib/virtio/virtio.h b/src/lib/virtio/virtio.h index e692926a85..0ac32395c5 100644 --- a/src/lib/virtio/virtio.h +++ b/src/lib/virtio/virtio.h @@ -58,7 +58,22 @@ enum { VIRTIO_NET_F_MQ = 1 << 22, // Device supports Receive Flow Steering VIRTIO_NET_F_CTRL_MAC_ADDR = 1 << 23 // Set MAC address }; - + +enum { + VIRTIO_F_NOTIFY_ON_EMPTY = 1 << 24, /* We notify when the ring is completely used, + even if the guest is suppressing callbacks */ + VIRTIO_F_ANY_LAYOUT = 1 << 27, // Can the device handle any descriptor layout? + VIRTIO_RING_F_INDIRECT_DESC = 1 << 28, // We support indirect buffer descriptors + VIRTIO_RING_F_EVENT_IDX = 1 << 29, /* The Guest publishes the used index for which + it expects an interrupt at the end of the avail + ring. Host should ignore the avail->flags field. + The Host publishes the avail index for which + it expects a kick at the end of the used ring. + Guest should ignore the used->flags field. */ + VIRTIO_F_BAD_FEATURE = 1 << 30 /* A guest should never accept this. It implies + negotiation is broken. */ +}; + struct virtio_net_hdr { uint8_t flags; // See flags enum above uint8_t gso_type; // See GSO type above