@@ -14,6 +14,7 @@ TAGS = ctags
OBJS += 8250-serial.o
OBJS += virtio-blk.o
+OBJS += virtio-net.o
OBJS += virtio-console.o
OBJS += cpuid.o
OBJS += read-write.o
@@ -10,6 +10,8 @@
#define IOPORT_VIRTIO_BLK_SIZE 256
#define IOPORT_VIRTIO_CONSOLE 0xd200 /* Virtio console device */
#define IOPORT_VIRTIO_CONSOLE_SIZE 256
+#define IOPORT_VIRTIO_NET 0xe200 /* Virtio network device */
+#define IOPORT_VIRTIO_NET_SIZE 256
struct kvm;
new file mode 100644
@@ -0,0 +1,7 @@
+#ifndef KVM_TYPES_H
+#define KVM_TYPES_H
+
+/* FIXME: include/linux/if_tun.h and include/linux/if_ether.h complains */
+#define __be16 u16
+
+#endif /* KVM_TYPES_H */
new file mode 100644
@@ -0,0 +1,7 @@
+#ifndef KVM__VIRTIO_NET_H
+#define KVM__VIRTIO_NET_H
+
+struct kvm;
+void virtio_net__init(struct kvm *self);
+
+#endif /* KVM__VIRTIO_NET_H */
@@ -13,6 +13,7 @@
#include <kvm/kvm-cpu.h>
#include <kvm/8250-serial.h>
#include <kvm/virtio-blk.h>
+#include <kvm/virtio-net.h>
#include <kvm/virtio-console.h>
#include <kvm/disk-image.h>
#include <kvm/util.h>
@@ -215,6 +216,8 @@ int kvm_cmd_run(int argc, const char **argv, const char *prefix)
virtio_console__init(kvm);
+ virtio_net__init(kvm);
+
kvm__start_timer(kvm);
for (i = 0; i < nrcpus; i++) {
new file mode 100644
@@ -0,0 +1,321 @@
+#include "kvm/virtio-net.h"
+#include "kvm/virtio-pci.h"
+#include "kvm/virtio.h"
+#include "kvm/ioport.h"
+#include "kvm/types.h"
+#include "kvm/mutex.h"
+#include "kvm/util.h"
+#include "kvm/kvm.h"
+#include "kvm/pci.h"
+
+#include <linux/virtio_net.h>
+#include <linux/if_tun.h>
+#include <net/if.h>
+#include <sys/ioctl.h>
+#include <assert.h>
+#include <fcntl.h>
+
+#define VIRTIO_NET_IRQ 14
+#define VIRTIO_NET_QUEUE_SIZE 128
+#define VIRTIO_NET_NUM_QUEUES 2
+#define VIRTIO_NET_RX_QUEUE 0
+#define VIRTIO_NET_TX_QUEUE 1
+#define PCI_VIRTIO_NET_DEVNUM 3
+
+struct net_device {
+ pthread_mutex_t mutex;
+
+ struct virt_queue vqs[VIRTIO_NET_NUM_QUEUES];
+ struct virtio_net_config net_config;
+ uint32_t host_features;
+ uint32_t guest_features;
+ uint16_t config_vector;
+ uint8_t status;
+ uint16_t queue_selector;
+
+ pthread_t io_rx_thread;
+ pthread_mutex_t io_rx_mutex;
+ pthread_cond_t io_rx_cond;
+
+ pthread_t io_tx_thread;
+ pthread_mutex_t io_tx_mutex;
+ pthread_cond_t io_tx_cond;
+
+ int tap_fd;
+ char tap_name[IFNAMSIZ];
+};
+
+static struct net_device net_device = {
+ .mutex = PTHREAD_MUTEX_INITIALIZER,
+
+ .net_config = {
+ .mac = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55},
+ .status = VIRTIO_NET_S_LINK_UP,
+ },
+
+ .host_features = 1UL << VIRTIO_NET_F_MAC,
+};
+
+static void *virtio_net_rx_thread(void *p)
+{
+ struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
+ struct virtio_net_hdr *hdr;
+ struct virt_queue *vq;
+ struct kvm *self;
+ uint16_t out, in;
+ uint16_t head;
+ int len;
+
+ self = p;
+ vq = &net_device.vqs[VIRTIO_NET_RX_QUEUE];
+
+ while (1) {
+ mutex_lock(&net_device.io_rx_mutex);
+ if (!virt_queue__available(vq))
+ pthread_cond_wait(&net_device.io_rx_cond, &net_device.io_rx_mutex);
+ mutex_unlock(&net_device.io_rx_mutex);
+
+ while (virt_queue__available(vq)) {
+ head = virt_queue__get_iov(vq, iov, &out, &in, self);
+
+ hdr = (struct virtio_net_hdr *)&iov[0];
+ hdr->flags = 0;
+ hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE;
+
+ len = readv(net_device.tap_fd, iov + 1, in - 1);
+
+ virt_queue__set_used_elem(vq, head, sizeof(*hdr) + len);
+
+ /* we should interrupt guest right now, otherwise latency is huge. */
+ kvm__irq_line(self, VIRTIO_NET_IRQ, 1);
+ }
+
+ }
+
+ pthread_exit(NULL);
+ return NULL;
+
+}
+
+static void *virtio_net_tx_thread(void *p)
+{
+ struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
+ struct virt_queue *vq;
+ struct kvm *self;
+ uint16_t out, in;
+ uint16_t head;
+ int len;
+
+ self = p;
+ vq = &net_device.vqs[VIRTIO_NET_TX_QUEUE];
+
+ while (1) {
+ mutex_lock(&net_device.io_tx_mutex);
+ if (!virt_queue__available(vq))
+ pthread_cond_wait(&net_device.io_tx_cond, &net_device.io_tx_mutex);
+ mutex_unlock(&net_device.io_tx_mutex);
+
+ while (virt_queue__available(vq)) {
+ head = virt_queue__get_iov(vq, iov, &out, &in, self);
+ len = writev(net_device.tap_fd, iov + 1, out - 1);
+ virt_queue__set_used_elem(vq, head, len);
+ }
+
+ kvm__irq_line(self, VIRTIO_NET_IRQ, 1);
+ }
+
+ pthread_exit(NULL);
+ return NULL;
+
+}
+static bool virtio_net_pci_io_device_specific_in(void *data, unsigned long offset, int size, uint32_t count)
+{
+ uint8_t *config_space = (uint8_t *) &net_device.net_config;
+
+ if (size != 1 || count != 1)
+ return false;
+
+ if ((offset - VIRTIO_PCI_CONFIG_NOMSI) > sizeof(struct virtio_net_config))
+ error("config offset is too big: %li", offset - VIRTIO_PCI_CONFIG_NOMSI);
+
+ ioport__write8(data, config_space[offset - VIRTIO_PCI_CONFIG_NOMSI]);
+
+ return true;
+}
+
+static bool virtio_net_pci_io_in(struct kvm *self, uint16_t port, void *data, int size, uint32_t count)
+{
+ unsigned long offset = port - IOPORT_VIRTIO_NET;
+ bool ret = true;
+
+ mutex_lock(&net_device.mutex);
+
+ switch (offset) {
+ case VIRTIO_PCI_HOST_FEATURES:
+ ioport__write32(data, net_device.host_features);
+ break;
+ case VIRTIO_PCI_GUEST_FEATURES:
+ ret = false;
+ break;
+ case VIRTIO_PCI_QUEUE_PFN:
+ ioport__write32(data, net_device.vqs[net_device.queue_selector].pfn);
+ break;
+ case VIRTIO_PCI_QUEUE_NUM:
+ ioport__write16(data, VIRTIO_NET_QUEUE_SIZE);
+ break;
+ case VIRTIO_PCI_QUEUE_SEL:
+ case VIRTIO_PCI_QUEUE_NOTIFY:
+ ret = false;
+ break;
+ case VIRTIO_PCI_STATUS:
+ ioport__write8(data, net_device.status);
+ break;
+ case VIRTIO_PCI_ISR:
+ ioport__write8(data, 0x1);
+ kvm__irq_line(self, VIRTIO_NET_IRQ, 0);
+ break;
+ case VIRTIO_MSI_CONFIG_VECTOR:
+ ioport__write16(data, net_device.config_vector);
+ break;
+ default:
+ ret = virtio_net_pci_io_device_specific_in(data, offset, size, count);
+ };
+
+ mutex_unlock(&net_device.mutex);
+
+ return ret;
+}
+
+static void virtio_net_handle_callback(struct kvm *self, uint16_t queue_index)
+{
+ if (queue_index == VIRTIO_NET_TX_QUEUE) {
+
+ mutex_lock(&net_device.io_tx_mutex);
+ pthread_cond_signal(&net_device.io_tx_cond);
+ mutex_unlock(&net_device.io_tx_mutex);
+
+ } else if (queue_index == VIRTIO_NET_RX_QUEUE) {
+
+ mutex_lock(&net_device.io_rx_mutex);
+ pthread_cond_signal(&net_device.io_rx_cond);
+ mutex_unlock(&net_device.io_rx_mutex);
+
+ }
+}
+
+static bool virtio_net_pci_io_out(struct kvm *self, uint16_t port, void *data, int size, uint32_t count)
+{
+ unsigned long offset = port - IOPORT_VIRTIO_NET;
+ bool ret = true;
+
+ mutex_lock(&net_device.mutex);
+
+ switch (offset) {
+ case VIRTIO_PCI_GUEST_FEATURES:
+ net_device.guest_features = ioport__read32(data);
+ break;
+ case VIRTIO_PCI_QUEUE_PFN: {
+ struct virt_queue *queue;
+ void *p;
+
+ assert(net_device.queue_selector < VIRTIO_NET_NUM_QUEUES);
+
+ queue = &net_device.vqs[net_device.queue_selector];
+ queue->pfn = ioport__read32(data);
+ p = guest_flat_to_host(self, queue->pfn << 12);
+
+ vring_init(&queue->vring, VIRTIO_NET_QUEUE_SIZE, p, 4096);
+
+ break;
+ }
+ case VIRTIO_PCI_QUEUE_SEL:
+ net_device.queue_selector = ioport__read16(data);
+ break;
+ case VIRTIO_PCI_QUEUE_NOTIFY: {
+ uint16_t queue_index;
+ queue_index = ioport__read16(data);
+ virtio_net_handle_callback(self, queue_index);
+ break;
+ }
+ case VIRTIO_PCI_STATUS:
+ net_device.status = ioport__read8(data);
+ break;
+ case VIRTIO_MSI_CONFIG_VECTOR:
+ net_device.config_vector = VIRTIO_MSI_NO_VECTOR;
+ break;
+ case VIRTIO_MSI_QUEUE_VECTOR:
+ break;
+ default:
+ ret = false;
+ };
+
+ mutex_unlock(&net_device.mutex);
+ return ret;
+}
+
+static struct ioport_operations virtio_net_io_ops = {
+ .io_in = virtio_net_pci_io_in,
+ .io_out = virtio_net_pci_io_out,
+};
+
+#define PCI_VENDOR_ID_REDHAT_QUMRANET 0x1af4
+#define PCI_DEVICE_ID_VIRTIO_NET 0x1000
+#define PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET 0x1af4
+#define PCI_SUBSYSTEM_ID_VIRTIO_NET 0x0001
+
+static struct pci_device_header virtio_net_pci_device = {
+ .vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET,
+ .device_id = PCI_DEVICE_ID_VIRTIO_NET,
+ .header_type = PCI_HEADER_TYPE_NORMAL,
+ .revision_id = 0,
+ .class = 0x020000,
+ .subsys_vendor_id = PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET,
+ .subsys_id = PCI_SUBSYSTEM_ID_VIRTIO_NET,
+ .bar[0] = IOPORT_VIRTIO_NET | PCI_BASE_ADDRESS_SPACE_IO,
+ .irq_pin = 3,
+ .irq_line = VIRTIO_NET_IRQ,
+};
+
+static void virtio_net__tap_init(void)
+{
+ struct ifreq ifr;
+
+ net_device.tap_fd = open("/dev/net/tun", O_RDWR);
+ if (net_device.tap_fd < 0)
+ die("Unable to open /dev/net/tun\n");
+
+ memset(&ifr, 0, sizeof(ifr));
+ ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
+
+ if (ioctl(net_device.tap_fd, TUNSETIFF, &ifr) < 0)
+ printf("config error");
+
+ strncpy(net_device.tap_name, ifr.ifr_name, sizeof(net_device.tap_name));
+
+ ioctl(net_device.tap_fd, TUNSETNOCSUM, 1);
+
+ /*FIXME: Remove this after user can specify ip address and netmask*/
+ system("ifconfig tap0 up");
+ system("ifconfig tap0 192.168.33.2");
+}
+
+static void virtio_net__io_thread_init(struct kvm *self)
+{
+ pthread_mutex_init(&net_device.io_rx_mutex, NULL);
+ pthread_cond_init(&net_device.io_tx_cond, NULL);
+
+ pthread_mutex_init(&net_device.io_rx_mutex, NULL);
+ pthread_cond_init(&net_device.io_tx_cond, NULL);
+
+ pthread_create(&net_device.io_rx_thread, NULL, virtio_net_rx_thread, (void *)self);
+ pthread_create(&net_device.io_tx_thread, NULL, virtio_net_tx_thread, (void *)self);
+}
+
+void virtio_net__init(struct kvm *self)
+{
+ pci__register(&virtio_net_pci_device, PCI_VIRTIO_NET_DEVNUM);
+ ioport__register(IOPORT_VIRTIO_NET, &virtio_net_io_ops, IOPORT_VIRTIO_NET_SIZE);
+
+ virtio_net__tap_init();
+ virtio_net__io_thread_init(self);
+}
This patch implement virtio network device. The current implementation uses tap which needs root privileges to create a virtual network device (tap0) on host side. Actually, what we need is CAP_NET_ADMIN. The host side tap0 is set to 192.168.33.2/24. You need to configure the guest side eth0 to any ip address in 192.168.33.0/24. Here are some scp performance test for differenct implementations: None of rx and tx as thread: guest to host 3.2MB/s host to guest 3.1MB/s Only rx as tread: guest to host 14.7MB/s host to guest 33.4MB/s Both rx and tx as thread(This patch works this way): guest to host 19.8MB/s host to guest 32.5MB/s Signed-off-by: Asias He <asias.hejun@gmail.com> --- tools/kvm/Makefile | 1 + tools/kvm/include/kvm/ioport.h | 2 + tools/kvm/include/kvm/types.h | 7 + tools/kvm/include/kvm/virtio-net.h | 7 + tools/kvm/kvm-run.c | 3 + tools/kvm/virtio-net.c | 321 ++++++++++++++++++++++++++++++++++++ 6 files changed, 341 insertions(+), 0 deletions(-) create mode 100644 tools/kvm/include/kvm/types.h create mode 100644 tools/kvm/include/kvm/virtio-net.h create mode 100644 tools/kvm/virtio-net.c