Message ID | 1478746069-79574-3-git-send-email-wei.w.wang@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Hi On Thu, Nov 10, 2016 at 6:47 AM Wei Wang <wei.w.wang@intel.com> wrote: > This is the slave part of vhost-user implemented in QEMU, with an extension > to support vhost-pci. > Instead of implementing "another vhost-user slave", it would be worth investigating using libvhost-user instead ( https://lists.gnu.org/archive/html/qemu-devel/2016-10/msg03990.html). This is just a suggestion, it is quite fine for vhost-pci to have its own smaller/specific vhost-user slave implementation (without virtio rings handlings etc). (libvhost-user is also very young, not yet in qemu, we should be able shape it for vhost-pci needs) > Signed-off-by: Wei Wang <wei.w.wang@intel.com> > --- > hw/virtio/Makefile.objs | 1 + > hw/virtio/vhost-pci-server.c | 469 > +++++++++++++++++++++++++++++++++++ > hw/virtio/vhost-user.c | 86 +------ > include/hw/virtio/vhost-pci-server.h | 45 ++++ > include/hw/virtio/vhost-user.h | 110 ++++++++ > include/sysemu/sysemu.h | 1 + > qemu-options.hx | 4 + > vl.c | 26 ++ > 8 files changed, 657 insertions(+), 85 deletions(-) > create mode 100644 hw/virtio/vhost-pci-server.c > create mode 100644 include/hw/virtio/vhost-pci-server.h > create mode 100644 include/hw/virtio/vhost-user.h > > diff --git a/hw/virtio/Makefile.objs b/hw/virtio/Makefile.objs > index 3e2b175..e44feb8 100644 > --- a/hw/virtio/Makefile.objs > +++ b/hw/virtio/Makefile.objs > @@ -2,6 +2,7 @@ common-obj-y += virtio-rng.o > common-obj-$(CONFIG_VIRTIO_PCI) += virtio-pci.o > common-obj-y += virtio-bus.o > common-obj-y += virtio-mmio.o > +common-obj-y += vhost-pci-server.o > > obj-y += virtio.o virtio-balloon.o > obj-$(CONFIG_LINUX) += vhost.o vhost-backend.o vhost-user.o > diff --git a/hw/virtio/vhost-pci-server.c b/hw/virtio/vhost-pci-server.c > new file mode 100644 > index 0000000..6ce8516 > --- /dev/null > +++ b/hw/virtio/vhost-pci-server.c > @@ -0,0 +1,469 @@ > +/* > + * Vhost-pci server > + * > + * Copyright Intel Corp. 2016 > + * > + * Authors: > + * Wei Wang <wei.w.wang@intel.com> > + * > + * This work is licensed under the terms of the GNU GPL, version 2 or > later. > + * See the COPYING file in the top-level directory. > + */ > + > +#include <qemu/osdep.h> > +#include <qemu/thread.h> > +#include <qemu/main-loop.h> > +#include <qemu/bitops.h> > +#include <qemu/bitmap.h> > +#include <qemu/sockets.h> > +#include <linux/virtio_net.h> > +#include "sysemu/char.h" > +#include "qapi/error.h" > +#include "hw/virtio/vhost-pci-server.h" > +#include "qemu/option.h" > +#include "monitor/qdev.h" > +#include "hw/virtio/vhost-user.h" > +#include "hw/qdev.h" > + > +#define VHOST_PCI_FEATURE_BITS (1ULL << VIRTIO_F_VERSION_1) > + > +#define VHOST_PCI_NET_FEATURE_BITS (1ULL << VIRTIO_NET_F_MRG_RXBUF) | \ > + (1ULL << VIRTIO_NET_F_CTRL_VQ) | \ > + (1ULL << VIRTIO_NET_F_MQ) > + > +#define VHOST_USER_SET_PEER_CONNECTION_OFF 0 > +#define VHOST_USER_SET_PEER_CONNECTION_ON 1 > +#define VHOST_USER_SET_PEER_CONNECTION_INIT 2 > + > +VhostPCIServer *vp_server; > + > +QemuOptsList qemu_vhost_pci_server_opts = { > + .name = "vhost-pci-server", > + .implied_opt_name = "chardev", > + .head = QTAILQ_HEAD_INITIALIZER(qemu_vhost_pci_server_opts.head), > + .desc = { > + /* > + * no elements => accept any > + * sanity checking will happen later > + * when setting device properties > + */ > + { /* end of list */ } > + }, > +}; > + > +static int vhost_pci_server_write(CharDriverState *chr, VhostUserMsg *msg) > +{ > + int size = msg->size + VHOST_USER_HDR_SIZE; > + > + if (!msg) > + return 0; > + > + msg->flags &= ~VHOST_USER_VERSION_MASK; > + msg->flags |= VHOST_USER_VERSION; > + > + return qemu_chr_fe_write_all_n(chr, msg->conn_id, > + (const uint8_t *)msg, size) == size ? 0 > : -1; > +} > + > +PeerConnectionTable *vp_server_find_table_ent(const char *dev_id) > +{ > + int i; > + PeerConnectionTable *ent; > + uint64_t max_connections = vp_server->chr->max_connections; > + > + for (i = 0; i < max_connections; i++) { > + ent = &vp_server->peer_table[i]; > + if (!strcmp(dev_id, ent->dev_id)) > + return ent; > + } > + return NULL; > +} > + > +static void vhost_pci_init_peer_table(uint64_t id) > +{ > + PeerConnectionTable *ent = &vp_server->peer_table[id]; > + > + ent->peer_feature_bits |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES; > + QLIST_INIT(&ent->vq_list); > + ent->vq_num = 0; > +} > + > +static int vhost_pci_get_conn_id(CharDriverState *chr, VhostUserMsg *msg) > +{ > + unsigned long *conn_bitmap = chr->conn_bitmap; > + unsigned long *old_conn_bitmap = vp_server->old_conn_bitmap; > + uint64_t nbits = chr->max_connections; > + uint64_t id; > + int r; > + > + bitmap_xor(old_conn_bitmap, old_conn_bitmap, conn_bitmap, > (long)nbits); > + > + for (id = find_first_bit(old_conn_bitmap, nbits); id < nbits; > + id = find_next_bit(old_conn_bitmap, nbits, id + 1)) { > + vhost_pci_init_peer_table(id); > + msg->conn_id = id; > + msg->payload.u64 = id; > + msg->size = sizeof(msg->payload.u64); > + msg->flags |= VHOST_USER_REPLY_MASK; > + r = vhost_pci_server_write(chr, msg); > + } > + bitmap_copy(old_conn_bitmap, conn_bitmap, (long)nbits); > + > + return r; > +} > + > +static int vhost_pci_get_peer_features(CharDriverState *chr, VhostUserMsg > *msg) > +{ > + PeerConnectionTable *ent = &vp_server->peer_table[msg->conn_id]; > + msg->payload.u64 = ent->peer_feature_bits; > + msg->size = sizeof(msg->payload.u64); > + msg->flags |= VHOST_USER_REPLY_MASK; > + return vhost_pci_server_write(chr, msg); > +} > + > +static int vhost_pci_get_queue_num(CharDriverState *chr, VhostUserMsg > *msg) > +{ > + PeerConnectionTable *ent = &vp_server->peer_table[msg->conn_id]; > + switch (ent->virtio_id) { > + case VIRTIO_ID_NET: > + msg->payload.u64 = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX; > + break; > + default: > + printf("%s: device type not supported yet..\n", __func__); > + } > + msg->size = sizeof(msg->payload.u64); > + msg->flags |= VHOST_USER_REPLY_MASK; > + return vhost_pci_server_write(chr, msg); > +} > + > +static int vhost_pci_get_protocol_features(CharDriverState *chr, > VhostUserMsg *msg) > +{ > + msg->payload.u64 = VHOST_USER_PROTOCOL_FEATURES; > + msg->size = sizeof(msg->payload.u64); > + msg->flags |= VHOST_USER_REPLY_MASK; > + return vhost_pci_server_write(chr, msg); > +} > + > +static void vhost_pci_set_protocol_features(VhostUserMsg *msg) > +{ > + vp_server->protocol_features = msg->payload.u64; > +} > + > +static int vhost_pci_device_create(uint64_t conn_id) > +{ > + PeerConnectionTable *ent = &vp_server->peer_table[conn_id]; > + Error *local_err = NULL; > + QemuOpts *opts; > + DeviceState *dev; > + char params[50]; > + > + switch (ent->virtio_id) { > + case VIRTIO_ID_NET: > + sprintf(params, "driver=vhost-pci-net-pci,id=vhost-pci-%ld", > conn_id); > + sprintf(ent->dev_id, "vhost-pci-%ld", conn_id); > + break; > + default: > + printf("%s: device type not supported yet..\n", __func__); > + } > + > + opts = qemu_opts_parse_noisily(qemu_find_opts("device"), params, > true); > + dev = qdev_device_add(opts, &local_err); > + if (!dev) { > + qemu_opts_del(opts); > + return -1; > + } > + object_unref(OBJECT(dev)); > + return 0; > +} > + > +static void vhost_pci_set_device_info(VhostUserMsg *msg) > +{ > + PeerConnectionTable *ent = &vp_server->peer_table[msg->conn_id]; > + DeviceInfo *info = &msg->payload.dev_info; > + > + memcpy(ent->uuid, info->uuid, sizeof(uuid_t)); > + ent->virtio_id = info->virtio_id; > + switch (ent->virtio_id) { > + case VIRTIO_ID_NET: > + ent->peer_feature_bits |= (VHOST_PCI_FEATURE_BITS | > VHOST_PCI_NET_FEATURE_BITS); > + break; > + default: > + printf("%s: device type not supported yet..\n", __func__); > + } > +} > + > +static void vhost_pci_set_peer_feature_bits(VhostUserMsg *msg) > +{ > + PeerConnectionTable *ent = &vp_server->peer_table[msg->conn_id]; > + > + ent->peer_feature_bits = msg->payload.u64; > +} > + > +static uint64_t vhost_pci_peer_mem_size_get(VhostUserMemory *peer_mem) > +{ > + int i; > + uint64_t total_size; > + uint32_t nregions = peer_mem->nregions; > + VhostUserMemoryRegion *regions = peer_mem->regions; > + > + for (i = 0; i < nregions; i++) { > + total_size += regions[i].memory_size; > + } > + > + return total_size; > +} > + > +static int vhost_pci_set_mem_table(uint64_t conn_id, VhostUserMemory > *peer_mem, int *fds) > +{ > + int i; > + void *mr_qva; > + PeerConnectionTable *ent = &vp_server->peer_table[conn_id]; > + uint32_t nregions = peer_mem->nregions; > + VhostUserMemoryRegion *peer_mr = peer_mem->regions; > + MemoryRegion *bar_mr = g_malloc(sizeof(MemoryRegion)); > + MemoryRegion *mr = g_malloc(nregions * sizeof(MemoryRegion)); > + uint64_t bar_size = 2 * vhost_pci_peer_mem_size_get(peer_mem); > + uint64_t bar_map_offset = 0; > + > + bar_size = pow2ceil(bar_size); > + memory_region_init(bar_mr, NULL, "Peer Memory", bar_size); > + > + for (i = 0; i < nregions; i++) { > + mr_qva = mmap(NULL, peer_mr[i].memory_size + > peer_mr[i].mmap_offset, > + PROT_READ | PROT_READ, MAP_SHARED, fds[i], 0); > + if (mr_qva == MAP_FAILED) { > + printf("%s called: map failed \n", __func__); > + return -1; > + } > + mr_qva += peer_mr[i].mmap_offset; > + memory_region_init_ram_ptr(&mr[i], NULL, "Peer Memory", > peer_mr[i].memory_size, mr_qva); > + memory_region_add_subregion(bar_mr, bar_map_offset, &mr[i]); > + bar_map_offset += peer_mr[i].memory_size; > + } > + ent->bar_mr = bar_mr; > + ent->bar_map_offset = bar_map_offset; > + > + return 0; > +} > + > +static void vhost_pci_alloc_peer_vring_info(uint64_t conn_id) > +{ > + PeerConnectionTable *ent = &vp_server->peer_table[conn_id]; > + PeerVirtqInfo *virtq_info = g_malloc0(sizeof(PeerVirtqInfo)); > + QLIST_INSERT_HEAD(&ent->vq_list, virtq_info, node); > + ent->vq_num++; > +} > + > +static void vhost_pci_set_vring_num(VhostUserMsg *msg) > +{ > + PeerConnectionTable *ent = &vp_server->peer_table[msg->conn_id]; > + PeerVirtqInfo *virtq_info = QLIST_FIRST(&ent->vq_list); > + > + virtq_info->vring_num = msg->payload.u64; > +} > + > +static void vhost_pci_set_vring_base(VhostUserMsg *msg) > +{ > + PeerConnectionTable *ent = &vp_server->peer_table[msg->conn_id]; > + PeerVirtqInfo *virtq_info = QLIST_FIRST(&ent->vq_list); > + > + virtq_info->last_avail_idx = msg->payload.u64; > +} > + > +static void vhost_pci_set_vring_addr(VhostUserMsg *msg) > +{ > + PeerConnectionTable *ent = &vp_server->peer_table[msg->conn_id]; > + PeerVirtqInfo *virtq_info = QLIST_FIRST(&ent->vq_list); > + memcpy(&virtq_info->addr, &msg->payload.addr, > + sizeof(struct vhost_vring_addr)); > +} > + > +static void vhost_pci_set_vring_kick(uint64_t conn_id, int fd) > +{ > + PeerConnectionTable *ent = &vp_server->peer_table[conn_id]; > + PeerVirtqInfo *virtq_info = QLIST_FIRST(&ent->vq_list); > + if (!virtq_info) > + virtq_info->kickfd = fd; > +} > + > +static void vhost_pci_set_vring_call(uint64_t conn_id, int fd) > +{ > + PeerConnectionTable *ent = &vp_server->peer_table[conn_id]; > + PeerVirtqInfo *virtq_info = QLIST_FIRST(&ent->vq_list); > + if (virtq_info) > + virtq_info->callfd = fd; > +} > + > +static void vhost_pci_set_peer_connection(VhostUserMsg *msg) > +{ > + uint64_t cmd = msg->payload.u64; > + uint64_t conn_id = msg->conn_id; > + > + switch (cmd) { > + case VHOST_USER_SET_PEER_CONNECTION_INIT: > + vhost_pci_device_create(conn_id); > + break; > + default: > + printf("%s called: cmd %lu not supported yet \n", __func__, cmd); > + } > +} > + > +static void vhost_pci_server_read(void *opaque, const uint8_t *buf, int > size) > +{ > + VhostUserMsg msg; > + uint8_t *p = (uint8_t *) &msg; > + CharDriverState *chr = (CharDriverState *)opaque; > + int fds[8], fd_num; > + > + if (size != VHOST_USER_HDR_SIZE) { > + printf("Wrong message size received %d\n", size); > + return; > + } > + memcpy(p, buf, VHOST_USER_HDR_SIZE); > + > + if (msg.size) { > + p += VHOST_USER_HDR_SIZE; > + size = qemu_chr_fe_read_all_n(chr, msg.conn_id, p, msg.size); > + if (size != msg.size) { > + printf("Wrong message size received %d != %d\n", > + size, msg.size); > + return; > + } > + } > + > + if (msg.request > VHOST_USER_MAX) > + printf("vhost read incorrect msg \n"); > + > + switch(msg.request) { > + case VHOST_USER_GET_CONN_ID: > + vhost_pci_get_conn_id(chr, &msg); > + break; > + case VHOST_USER_GET_FEATURES: > + vhost_pci_get_peer_features(chr, &msg); > + break; > + case VHOST_USER_GET_PROTOCOL_FEATURES: > + vhost_pci_get_protocol_features(chr, &msg); > + break; > + case VHOST_USER_SET_PROTOCOL_FEATURES: > + vhost_pci_set_protocol_features(&msg); > + break; > + case VHOST_USER_SET_DEV_INFO: > + vhost_pci_set_device_info(&msg); > + break; > + case VHOST_USER_GET_QUEUE_NUM: > + vhost_pci_get_queue_num(chr, &msg); > + break; > + case VHOST_USER_SET_OWNER: > + break; > + case VHOST_USER_SET_FEATURES: > + vhost_pci_set_peer_feature_bits(&msg); > + break; > + case VHOST_USER_SET_VRING_NUM: > + vhost_pci_alloc_peer_vring_info(msg.conn_id); > + vhost_pci_set_vring_num(&msg); > + break; > + case VHOST_USER_SET_VRING_BASE: > + vhost_pci_set_vring_base(&msg); > + break; > + case VHOST_USER_SET_VRING_ADDR: > + vhost_pci_set_vring_addr(&msg); > + break; > + case VHOST_USER_SET_VRING_KICK: > + /* consume the fd */ > + qemu_chr_fe_get_msgfds_n(chr, msg.conn_id, fds, 1); > + printf("VHOST_USER_SET_VRING_KICK called:..kickfd = %d\n", > fds[0]); > + vhost_pci_set_vring_kick(msg.conn_id, fds[0]); > + /* > + * This is a non-blocking eventfd. > + * The receive function forces it to be blocking, > + * so revert it back to non-blocking. > + */ > + qemu_set_nonblock(fds[0]); > + break; > + case VHOST_USER_SET_VRING_CALL: > + /* consume the fd */ > + qemu_chr_fe_get_msgfds_n(chr, msg.conn_id, fds, 1); > + vhost_pci_set_vring_call(msg.conn_id, fds[0]); > + /* > + * This is a non-blocking eventfd. > + * The receive function forces it to be blocking, > + * so revert it back to non-blocking. > + */ > + qemu_set_nonblock(fds[0]); > + break; > + case VHOST_USER_SET_MEM_TABLE: > + fd_num = qemu_chr_fe_get_msgfds_n(chr, msg.conn_id, > + fds, sizeof(fds) / sizeof(int)); > + printf("VHOST_USER_SET_MEM_TABLE: fd = %d \n", fd_num); > + vhost_pci_set_mem_table(msg.conn_id, &msg.payload.memory, fds); > + break; > + case VHOST_USER_SET_PEER_CONNECTION: > + vhost_pci_set_peer_connection(&msg); > + break; > + default: > + printf("default called..msg->request = %d \n", msg.request); > + break; > + } > +} > + > +static int vhost_pci_server_can_read(void *opaque) > +{ > + return VHOST_USER_HDR_SIZE; > +} > + > +static void vhost_pci_server_event(void *opaque, int event) > +{ > + switch (event) { > + case CHR_EVENT_OPENED: > + printf("vhost_pci_server_event called.. \n"); > + break; > + case CHR_EVENT_CLOSED: > + printf("vhost_pci_server_event called: event close..\n"); > + break; > + } > +} > + > +static CharDriverState *vhost_pci_server_parse_chardev(const char *id) > +{ > + CharDriverState *chr = qemu_chr_find(id); > + if (chr == NULL) { > + printf("chardev \"%s\" not found", id); > + return NULL; > + } > + > + qemu_chr_fe_claim_no_fail(chr); > + > + return chr; > +} > + > +int vhost_pci_server_init(QemuOpts *opts) > +{ > + CharDriverState *chr; > + const char *chardev_id = qemu_opt_get(opts, "chardev"); > + uint64_t max_connections; > + > + vp_server = (VhostPCIServer *)malloc(sizeof(VhostPCIServer)); > + > + chr = vhost_pci_server_parse_chardev(chardev_id); > + if (!chr) { > + return -1; > + } > + max_connections = chr->max_connections; > + > + qemu_chr_add_handlers(chr, vhost_pci_server_can_read, > vhost_pci_server_read, vhost_pci_server_event, chr); > + > + vp_server->chr = chr; > + > + vp_server->peer_table = (PeerConnectionTable > *)g_malloc0(max_connections * sizeof(PeerConnectionTable)); > + > + vp_server->old_conn_bitmap = bitmap_new(max_connections); > + > + return 0; > +} > + > +int vhost_pci_server_cleanup(void) > +{ > + free(vp_server); > + printf("vhost_pci_server_cleanup called.. \n"); > + return 0; > +} > diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c > index b57454a..bce5181 100644 > --- a/hw/virtio/vhost-user.c > +++ b/hw/virtio/vhost-user.c > @@ -13,6 +13,7 @@ > #include "hw/virtio/vhost.h" > #include "hw/virtio/vhost-backend.h" > #include "hw/virtio/virtio-net.h" > +#include "hw/virtio/vhost-user.h" > #include "sysemu/char.h" > #include "sysemu/kvm.h" > #include "qemu/error-report.h" > @@ -24,91 +25,6 @@ > #include <sys/un.h> > #include <linux/vhost.h> > > -#define VHOST_MEMORY_MAX_NREGIONS 8 > -#define VHOST_USER_F_PROTOCOL_FEATURES 30 > - > -enum VhostUserProtocolFeature { > - VHOST_USER_PROTOCOL_F_MQ = 0, > - VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1, > - VHOST_USER_PROTOCOL_F_RARP = 2, > - VHOST_USER_PROTOCOL_F_REPLY_ACK = 3, > - > - VHOST_USER_PROTOCOL_F_MAX > -}; > - > -#define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << > VHOST_USER_PROTOCOL_F_MAX) - 1) > - > -typedef enum VhostUserRequest { > - VHOST_USER_NONE = 0, > - VHOST_USER_GET_FEATURES = 1, > - VHOST_USER_SET_FEATURES = 2, > - VHOST_USER_SET_OWNER = 3, > - VHOST_USER_RESET_OWNER = 4, > - VHOST_USER_SET_MEM_TABLE = 5, > - VHOST_USER_SET_LOG_BASE = 6, > - VHOST_USER_SET_LOG_FD = 7, > - VHOST_USER_SET_VRING_NUM = 8, > - VHOST_USER_SET_VRING_ADDR = 9, > - VHOST_USER_SET_VRING_BASE = 10, > - VHOST_USER_GET_VRING_BASE = 11, > - VHOST_USER_SET_VRING_KICK = 12, > - VHOST_USER_SET_VRING_CALL = 13, > - VHOST_USER_SET_VRING_ERR = 14, > - VHOST_USER_GET_PROTOCOL_FEATURES = 15, > - VHOST_USER_SET_PROTOCOL_FEATURES = 16, > - VHOST_USER_GET_QUEUE_NUM = 17, > - VHOST_USER_SET_VRING_ENABLE = 18, > - VHOST_USER_SEND_RARP = 19, > - VHOST_USER_MAX > -} VhostUserRequest; > - > -typedef struct VhostUserMemoryRegion { > - uint64_t guest_phys_addr; > - uint64_t memory_size; > - uint64_t userspace_addr; > - uint64_t mmap_offset; > -} VhostUserMemoryRegion; > - > -typedef struct VhostUserMemory { > - uint32_t nregions; > - uint32_t padding; > - VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS]; > -} VhostUserMemory; > - > -typedef struct VhostUserLog { > - uint64_t mmap_size; > - uint64_t mmap_offset; > -} VhostUserLog; > - > -typedef struct VhostUserMsg { > - VhostUserRequest request; > - > -#define VHOST_USER_VERSION_MASK (0x3) > -#define VHOST_USER_REPLY_MASK (0x1<<2) > -#define VHOST_USER_NEED_REPLY_MASK (0x1 << 3) > - uint32_t flags; > - uint32_t size; /* the following payload size */ > - union { > -#define VHOST_USER_VRING_IDX_MASK (0xff) > -#define VHOST_USER_VRING_NOFD_MASK (0x1<<8) > - uint64_t u64; > - struct vhost_vring_state state; > - struct vhost_vring_addr addr; > - VhostUserMemory memory; > - VhostUserLog log; > - } payload; > -} QEMU_PACKED VhostUserMsg; > - > -static VhostUserMsg m __attribute__ ((unused)); > -#define VHOST_USER_HDR_SIZE (sizeof(m.request) \ > - + sizeof(m.flags) \ > - + sizeof(m.size)) > - > -#define VHOST_USER_PAYLOAD_SIZE (sizeof(m) - VHOST_USER_HDR_SIZE) > - > -/* The version of the protocol we support */ > -#define VHOST_USER_VERSION (0x1) > - > static bool ioeventfd_enabled(void) > { > return kvm_enabled() && kvm_eventfds_enabled(); > diff --git a/include/hw/virtio/vhost-pci-server.h > b/include/hw/virtio/vhost-pci-server.h > new file mode 100644 > index 0000000..c9c4a69 > --- /dev/null > +++ b/include/hw/virtio/vhost-pci-server.h > @@ -0,0 +1,45 @@ > +#ifndef QEMU_VHOST_PCI_SERVER_H > +#define QEMU_VHOST_PCI_SERVER_H > + > +#include <uuid/uuid.h> > +#include <linux/vhost.h> > + > +typedef struct PeerVirtqInfo { > + int kickfd; > + int callfd; > + uint32_t vring_num; > + uint16_t last_avail_idx; > + struct vhost_vring_addr addr; > + QLIST_ENTRY(PeerVirtqInfo) node; > +} PeerVirtqInfo; > + > +typedef struct PeerConnectionTable { > + char dev_id[30]; > + uuid_t uuid; > + uint16_t virtio_id; > + uint32_t bar_id; > + MemoryRegion *bar_mr; > + uint64_t bar_map_offset; > + uint64_t peer_feature_bits; > + void *opaque; > + uint16_t vq_num; > + QLIST_HEAD(, PeerVirtqInfo) vq_list; > +} PeerConnectionTable; > + > +typedef struct VhostPCIServer { > + CharDriverState *chr; > + uint64_t protocol_features; > + unsigned long *old_conn_bitmap; > + /* a table indexed by the peer connection id */ > + PeerConnectionTable *peer_table; > +} VhostPCIServer; > + > +extern VhostPCIServer *vp_server; > + > +extern int vhost_pci_server_init(QemuOpts *opts); > + > +extern int vhost_pci_server_cleanup(void); > + > +extern PeerConnectionTable *vp_server_find_table_ent(const char *dev_id); > + > +#endif > diff --git a/include/hw/virtio/vhost-user.h > b/include/hw/virtio/vhost-user.h > new file mode 100644 > index 0000000..794a8d8 > --- /dev/null > +++ b/include/hw/virtio/vhost-user.h > @@ -0,0 +1,110 @@ > +#ifndef VHOST_USER_H > +#define VHOST_USER_H > + > +#include <linux/vhost.h> > +#include <uuid/uuid.h> > + > +#define VHOST_MEMORY_MAX_NREGIONS 8 > +#define VHOST_USER_F_PROTOCOL_FEATURES 30 > + > +enum VhostUserProtocolFeature { > + VHOST_USER_PROTOCOL_F_MQ = 0, > + VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1, > + VHOST_USER_PROTOCOL_F_RARP = 2, > + VHOST_USER_PROTOCOL_F_REPLY_ACK = 3, > + VHOST_USER_PROTOCOL_F_VHOST_PCI =4, > + > + VHOST_USER_PROTOCOL_F_MAX > +}; > + > +#define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << > VHOST_USER_PROTOCOL_F_MAX) - 1) > + > +#define VHOST_USER_PROTOCOL_FEATURES ((1ULL << VHOST_USER_PROTOCOL_F_MQ) > | \ > + (1ULL << > VHOST_USER_PROTOCOL_F_LOG_SHMFD) | \ > + (1ULL << > VHOST_USER_PROTOCOL_F_RARP)) | \ > + (1ULL << > VHOST_USER_PROTOCOL_F_VHOST_PCI) > + > +typedef enum VhostUserRequest { > + VHOST_USER_NONE = 0, > + VHOST_USER_GET_FEATURES = 1, > + VHOST_USER_SET_FEATURES = 2, > + VHOST_USER_SET_OWNER = 3, > + VHOST_USER_RESET_OWNER = 4, > + VHOST_USER_SET_MEM_TABLE = 5, > + VHOST_USER_SET_LOG_BASE = 6, > + VHOST_USER_SET_LOG_FD = 7, > + VHOST_USER_SET_VRING_NUM = 8, > + VHOST_USER_SET_VRING_ADDR = 9, > + VHOST_USER_SET_VRING_BASE = 10, > + VHOST_USER_GET_VRING_BASE = 11, > + VHOST_USER_SET_VRING_KICK = 12, > + VHOST_USER_SET_VRING_CALL = 13, > + VHOST_USER_SET_VRING_ERR = 14, > + VHOST_USER_GET_PROTOCOL_FEATURES = 15, > + VHOST_USER_SET_PROTOCOL_FEATURES = 16, > + VHOST_USER_GET_QUEUE_NUM = 17, > + VHOST_USER_SET_VRING_ENABLE = 18, > + VHOST_USER_SEND_RARP = 19, > + VHOST_USER_GET_CONN_ID = 20, > + VHOST_USER_SET_DEV_INFO = 21, > + VHOST_USER_SET_PEER_CONNECTION = 22, > + VHOST_USER_MAX > +} VhostUserRequest; > + > +typedef struct VhostUserMemoryRegion { > + uint64_t guest_phys_addr; > + uint64_t memory_size; > + uint64_t userspace_addr; > + uint64_t mmap_offset; > +} VhostUserMemoryRegion; > + > +typedef struct VhostUserMemory { > + uint32_t nregions; > + uint32_t padding; > + VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS]; > +} VhostUserMemory; > + > +typedef struct VhostUserLog { > + uint64_t mmap_size; > + uint64_t mmap_offset; > +} VhostUserLog; > + > +typedef struct DeviceInfo { > + uuid_t uuid; > + uint16_t virtio_id; > +} DeviceInfo; > + > +typedef struct VhostUserMsg { > + VhostUserRequest request; > + > +#define VHOST_USER_VERSION_MASK (0x3) > +#define VHOST_USER_REPLY_MASK (0x1<<2) > +#define VHOST_USER_NEED_REPLY_MASK (0x1 << 3) > + uint32_t flags; > + uint32_t size; /* the following payload size */ > + uint64_t conn_id; > + union { > +#define VHOST_USER_VRING_IDX_MASK (0xff) > +#define VHOST_USER_VRING_NOFD_MASK (0x1<<8) > + uint64_t u64; > + struct vhost_vring_state state; > + struct vhost_vring_addr addr; > + VhostUserMemory memory; > + VhostUserLog log; > + DeviceInfo dev_info; > + } payload; > +} QEMU_PACKED VhostUserMsg; > + > +static VhostUserMsg m __attribute__ ((unused)); > +#define VHOST_USER_HDR_SIZE (sizeof(m.request) \ > + + sizeof(m.flags) \ > + + sizeof(m.size)) \ > + + sizeof(m.conn_id) > + > +#define VHOST_USER_PAYLOAD_SIZE (sizeof(m) - VHOST_USER_HDR_SIZE) > + > +/* The version of the protocol we support */ > +#define VHOST_USER_VERSION (0x2) > + > +#endif > + > diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h > index ee7c760..7f8b25c 100644 > --- a/include/sysemu/sysemu.h > +++ b/include/sysemu/sysemu.h > @@ -244,5 +244,6 @@ extern QemuOptsList qemu_netdev_opts; > extern QemuOptsList qemu_net_opts; > extern QemuOptsList qemu_global_opts; > extern QemuOptsList qemu_mon_opts; > +extern QemuOptsList qemu_vhost_pci_server_opts; > > #endif > diff --git a/qemu-options.hx b/qemu-options.hx > index a71aaf8..1fdb820 100644 > --- a/qemu-options.hx > +++ b/qemu-options.hx > @@ -3968,6 +3968,10 @@ contents of @code{iv.b64} to the second secret > > ETEXI > > +DEF("vhost-pci-server", HAS_ARG, QEMU_OPTION_vhost_pci_server, > + "-vhost-pci-server socket,chrdev={id}\n" > + " creates a vhost-pci-server", > + QEMU_ARCH_I386) > > HXCOMM This is the last statement. Insert new options before this line! > STEXI > diff --git a/vl.c b/vl.c > index b3c80d5..c1f038d 100644 > --- a/vl.c > +++ b/vl.c > @@ -121,6 +121,7 @@ int main(int argc, char **argv) > #include "crypto/init.h" > #include "sysemu/replay.h" > #include "qapi/qmp/qerror.h" > +#include "hw/virtio/vhost-pci-server.h" > > #define MAX_VIRTIO_CONSOLES 1 > #define MAX_SCLP_CONSOLES 1 > @@ -178,6 +179,7 @@ bool boot_strict; > uint8_t *boot_splash_filedata; > size_t boot_splash_filedata_size; > uint8_t qemu_extra_params_fw[2]; > +bool vhost_pci_server_enabled; > > int icount_align_option; > > @@ -2980,6 +2982,7 @@ int main(int argc, char **argv, char **envp) > qemu_add_drive_opts(&qemu_drive_opts); > qemu_add_opts(&qemu_chardev_opts); > qemu_add_opts(&qemu_device_opts); > + qemu_add_opts(&qemu_vhost_pci_server_opts); > qemu_add_opts(&qemu_netdev_opts); > qemu_add_opts(&qemu_net_opts); > qemu_add_opts(&qemu_rtc_opts); > @@ -3970,6 +3973,13 @@ int main(int argc, char **argv, char **envp) > exit(1); > } > break; > + case QEMU_OPTION_vhost_pci_server: > + vhost_pci_server_enabled = true; > + opts = > qemu_opts_parse_noisily(qemu_find_opts("vhost-pci-server"), optarg, false); > + if (!opts) { > + exit(1); > + } > + break; > default: > os_parse_cmd_args(popt->index, optarg); > } > @@ -4479,6 +4489,16 @@ int main(int argc, char **argv, char **envp) > exit(1); > } > > + /* check if the vhost-pci-server is enabled */ > + if (vhost_pci_server_enabled) { > + int ret; > + ret = vhost_pci_server_init(qemu_opts_find( > + qemu_find_opts("vhost-pci-server"), > + NULL)); > + if (ret < 0) > + exit(1); > + } > + > /* init USB devices */ > if (machine_usb(current_machine)) { > if (foreach_device_config(DEV_USB, usb_parse) < 0) > @@ -4607,6 +4627,12 @@ int main(int argc, char **argv, char **envp) > bdrv_close_all(); > pause_all_vcpus(); > res_free(); > + if (vhost_pci_server_enabled) { > + int ret; > + ret = vhost_pci_server_cleanup(); > + if (ret < 0) > + exit(1); > + } > #ifdef CONFIG_TPM > tpm_cleanup(); > #endif > -- > 2.7.4 > > -- Marc-André Lureau
On 11/10/2016 07:36 PM, Marc-André Lureau wrote: > Hi > > On Thu, Nov 10, 2016 at 6:47 AM Wei Wang <wei.w.wang@intel.com > <mailto:wei.w.wang@intel.com>> wrote: > > This is the slave part of vhost-user implemented in QEMU, with an > extension > to support vhost-pci. > > > Instead of implementing "another vhost-user slave", it would be worth > investigating using libvhost-user instead > (https://lists.gnu.org/archive/html/qemu-devel/2016-10/msg03990.html). > This is just a suggestion, it is quite fine for vhost-pci to have its > own smaller/specific vhost-user slave implementation (without virtio > rings handlings etc). (libvhost-user is also very young, not yet in > qemu, we should be able shape it for vhost-pci needs) Thanks for the suggestion. It looks possible. I will need to investigate more about it. libvhost-user is still at an initial stage and not in QEMU yet, I think we can probably have vhost-pci as an independent slave for now - this will not complicate libvhost-user when it's in the initial stage (it might not be good to make it a quite complex design at the beginning), and on the other side, the vhost-pci based slave implementation draft is already there. We can merge them if it is necessary when they both are stable in QEMU. What do you think? Best, Wei
diff --git a/hw/virtio/Makefile.objs b/hw/virtio/Makefile.objs index 3e2b175..e44feb8 100644 --- a/hw/virtio/Makefile.objs +++ b/hw/virtio/Makefile.objs @@ -2,6 +2,7 @@ common-obj-y += virtio-rng.o common-obj-$(CONFIG_VIRTIO_PCI) += virtio-pci.o common-obj-y += virtio-bus.o common-obj-y += virtio-mmio.o +common-obj-y += vhost-pci-server.o obj-y += virtio.o virtio-balloon.o obj-$(CONFIG_LINUX) += vhost.o vhost-backend.o vhost-user.o diff --git a/hw/virtio/vhost-pci-server.c b/hw/virtio/vhost-pci-server.c new file mode 100644 index 0000000..6ce8516 --- /dev/null +++ b/hw/virtio/vhost-pci-server.c @@ -0,0 +1,469 @@ +/* + * Vhost-pci server + * + * Copyright Intel Corp. 2016 + * + * Authors: + * Wei Wang <wei.w.wang@intel.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include <qemu/osdep.h> +#include <qemu/thread.h> +#include <qemu/main-loop.h> +#include <qemu/bitops.h> +#include <qemu/bitmap.h> +#include <qemu/sockets.h> +#include <linux/virtio_net.h> +#include "sysemu/char.h" +#include "qapi/error.h" +#include "hw/virtio/vhost-pci-server.h" +#include "qemu/option.h" +#include "monitor/qdev.h" +#include "hw/virtio/vhost-user.h" +#include "hw/qdev.h" + +#define VHOST_PCI_FEATURE_BITS (1ULL << VIRTIO_F_VERSION_1) + +#define VHOST_PCI_NET_FEATURE_BITS (1ULL << VIRTIO_NET_F_MRG_RXBUF) | \ + (1ULL << VIRTIO_NET_F_CTRL_VQ) | \ + (1ULL << VIRTIO_NET_F_MQ) + +#define VHOST_USER_SET_PEER_CONNECTION_OFF 0 +#define VHOST_USER_SET_PEER_CONNECTION_ON 1 +#define VHOST_USER_SET_PEER_CONNECTION_INIT 2 + +VhostPCIServer *vp_server; + +QemuOptsList qemu_vhost_pci_server_opts = { + .name = "vhost-pci-server", + .implied_opt_name = "chardev", + .head = QTAILQ_HEAD_INITIALIZER(qemu_vhost_pci_server_opts.head), + .desc = { + /* + * no elements => accept any + * sanity checking will happen later + * when setting device properties + */ + { /* end of list */ } + }, +}; + +static int vhost_pci_server_write(CharDriverState *chr, VhostUserMsg *msg) +{ + int size = msg->size + VHOST_USER_HDR_SIZE; + + if (!msg) + return 0; + + msg->flags &= ~VHOST_USER_VERSION_MASK; + msg->flags |= VHOST_USER_VERSION; + + return qemu_chr_fe_write_all_n(chr, msg->conn_id, + (const uint8_t *)msg, size) == size ? 0 : -1; +} + +PeerConnectionTable *vp_server_find_table_ent(const char *dev_id) +{ + int i; + PeerConnectionTable *ent; + uint64_t max_connections = vp_server->chr->max_connections; + + for (i = 0; i < max_connections; i++) { + ent = &vp_server->peer_table[i]; + if (!strcmp(dev_id, ent->dev_id)) + return ent; + } + return NULL; +} + +static void vhost_pci_init_peer_table(uint64_t id) +{ + PeerConnectionTable *ent = &vp_server->peer_table[id]; + + ent->peer_feature_bits |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES; + QLIST_INIT(&ent->vq_list); + ent->vq_num = 0; +} + +static int vhost_pci_get_conn_id(CharDriverState *chr, VhostUserMsg *msg) +{ + unsigned long *conn_bitmap = chr->conn_bitmap; + unsigned long *old_conn_bitmap = vp_server->old_conn_bitmap; + uint64_t nbits = chr->max_connections; + uint64_t id; + int r; + + bitmap_xor(old_conn_bitmap, old_conn_bitmap, conn_bitmap, (long)nbits); + + for (id = find_first_bit(old_conn_bitmap, nbits); id < nbits; + id = find_next_bit(old_conn_bitmap, nbits, id + 1)) { + vhost_pci_init_peer_table(id); + msg->conn_id = id; + msg->payload.u64 = id; + msg->size = sizeof(msg->payload.u64); + msg->flags |= VHOST_USER_REPLY_MASK; + r = vhost_pci_server_write(chr, msg); + } + bitmap_copy(old_conn_bitmap, conn_bitmap, (long)nbits); + + return r; +} + +static int vhost_pci_get_peer_features(CharDriverState *chr, VhostUserMsg *msg) +{ + PeerConnectionTable *ent = &vp_server->peer_table[msg->conn_id]; + msg->payload.u64 = ent->peer_feature_bits; + msg->size = sizeof(msg->payload.u64); + msg->flags |= VHOST_USER_REPLY_MASK; + return vhost_pci_server_write(chr, msg); +} + +static int vhost_pci_get_queue_num(CharDriverState *chr, VhostUserMsg *msg) +{ + PeerConnectionTable *ent = &vp_server->peer_table[msg->conn_id]; + switch (ent->virtio_id) { + case VIRTIO_ID_NET: + msg->payload.u64 = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX; + break; + default: + printf("%s: device type not supported yet..\n", __func__); + } + msg->size = sizeof(msg->payload.u64); + msg->flags |= VHOST_USER_REPLY_MASK; + return vhost_pci_server_write(chr, msg); +} + +static int vhost_pci_get_protocol_features(CharDriverState *chr, VhostUserMsg *msg) +{ + msg->payload.u64 = VHOST_USER_PROTOCOL_FEATURES; + msg->size = sizeof(msg->payload.u64); + msg->flags |= VHOST_USER_REPLY_MASK; + return vhost_pci_server_write(chr, msg); +} + +static void vhost_pci_set_protocol_features(VhostUserMsg *msg) +{ + vp_server->protocol_features = msg->payload.u64; +} + +static int vhost_pci_device_create(uint64_t conn_id) +{ + PeerConnectionTable *ent = &vp_server->peer_table[conn_id]; + Error *local_err = NULL; + QemuOpts *opts; + DeviceState *dev; + char params[50]; + + switch (ent->virtio_id) { + case VIRTIO_ID_NET: + sprintf(params, "driver=vhost-pci-net-pci,id=vhost-pci-%ld", conn_id); + sprintf(ent->dev_id, "vhost-pci-%ld", conn_id); + break; + default: + printf("%s: device type not supported yet..\n", __func__); + } + + opts = qemu_opts_parse_noisily(qemu_find_opts("device"), params, true); + dev = qdev_device_add(opts, &local_err); + if (!dev) { + qemu_opts_del(opts); + return -1; + } + object_unref(OBJECT(dev)); + return 0; +} + +static void vhost_pci_set_device_info(VhostUserMsg *msg) +{ + PeerConnectionTable *ent = &vp_server->peer_table[msg->conn_id]; + DeviceInfo *info = &msg->payload.dev_info; + + memcpy(ent->uuid, info->uuid, sizeof(uuid_t)); + ent->virtio_id = info->virtio_id; + switch (ent->virtio_id) { + case VIRTIO_ID_NET: + ent->peer_feature_bits |= (VHOST_PCI_FEATURE_BITS | VHOST_PCI_NET_FEATURE_BITS); + break; + default: + printf("%s: device type not supported yet..\n", __func__); + } +} + +static void vhost_pci_set_peer_feature_bits(VhostUserMsg *msg) +{ + PeerConnectionTable *ent = &vp_server->peer_table[msg->conn_id]; + + ent->peer_feature_bits = msg->payload.u64; +} + +static uint64_t vhost_pci_peer_mem_size_get(VhostUserMemory *peer_mem) +{ + int i; + uint64_t total_size; + uint32_t nregions = peer_mem->nregions; + VhostUserMemoryRegion *regions = peer_mem->regions; + + for (i = 0; i < nregions; i++) { + total_size += regions[i].memory_size; + } + + return total_size; +} + +static int vhost_pci_set_mem_table(uint64_t conn_id, VhostUserMemory *peer_mem, int *fds) +{ + int i; + void *mr_qva; + PeerConnectionTable *ent = &vp_server->peer_table[conn_id]; + uint32_t nregions = peer_mem->nregions; + VhostUserMemoryRegion *peer_mr = peer_mem->regions; + MemoryRegion *bar_mr = g_malloc(sizeof(MemoryRegion)); + MemoryRegion *mr = g_malloc(nregions * sizeof(MemoryRegion)); + uint64_t bar_size = 2 * vhost_pci_peer_mem_size_get(peer_mem); + uint64_t bar_map_offset = 0; + + bar_size = pow2ceil(bar_size); + memory_region_init(bar_mr, NULL, "Peer Memory", bar_size); + + for (i = 0; i < nregions; i++) { + mr_qva = mmap(NULL, peer_mr[i].memory_size + peer_mr[i].mmap_offset, + PROT_READ | PROT_READ, MAP_SHARED, fds[i], 0); + if (mr_qva == MAP_FAILED) { + printf("%s called: map failed \n", __func__); + return -1; + } + mr_qva += peer_mr[i].mmap_offset; + memory_region_init_ram_ptr(&mr[i], NULL, "Peer Memory", peer_mr[i].memory_size, mr_qva); + memory_region_add_subregion(bar_mr, bar_map_offset, &mr[i]); + bar_map_offset += peer_mr[i].memory_size; + } + ent->bar_mr = bar_mr; + ent->bar_map_offset = bar_map_offset; + + return 0; +} + +static void vhost_pci_alloc_peer_vring_info(uint64_t conn_id) +{ + PeerConnectionTable *ent = &vp_server->peer_table[conn_id]; + PeerVirtqInfo *virtq_info = g_malloc0(sizeof(PeerVirtqInfo)); + QLIST_INSERT_HEAD(&ent->vq_list, virtq_info, node); + ent->vq_num++; +} + +static void vhost_pci_set_vring_num(VhostUserMsg *msg) +{ + PeerConnectionTable *ent = &vp_server->peer_table[msg->conn_id]; + PeerVirtqInfo *virtq_info = QLIST_FIRST(&ent->vq_list); + + virtq_info->vring_num = msg->payload.u64; +} + +static void vhost_pci_set_vring_base(VhostUserMsg *msg) +{ + PeerConnectionTable *ent = &vp_server->peer_table[msg->conn_id]; + PeerVirtqInfo *virtq_info = QLIST_FIRST(&ent->vq_list); + + virtq_info->last_avail_idx = msg->payload.u64; +} + +static void vhost_pci_set_vring_addr(VhostUserMsg *msg) +{ + PeerConnectionTable *ent = &vp_server->peer_table[msg->conn_id]; + PeerVirtqInfo *virtq_info = QLIST_FIRST(&ent->vq_list); + memcpy(&virtq_info->addr, &msg->payload.addr, + sizeof(struct vhost_vring_addr)); +} + +static void vhost_pci_set_vring_kick(uint64_t conn_id, int fd) +{ + PeerConnectionTable *ent = &vp_server->peer_table[conn_id]; + PeerVirtqInfo *virtq_info = QLIST_FIRST(&ent->vq_list); + if (!virtq_info) + virtq_info->kickfd = fd; +} + +static void vhost_pci_set_vring_call(uint64_t conn_id, int fd) +{ + PeerConnectionTable *ent = &vp_server->peer_table[conn_id]; + PeerVirtqInfo *virtq_info = QLIST_FIRST(&ent->vq_list); + if (virtq_info) + virtq_info->callfd = fd; +} + +static void vhost_pci_set_peer_connection(VhostUserMsg *msg) +{ + uint64_t cmd = msg->payload.u64; + uint64_t conn_id = msg->conn_id; + + switch (cmd) { + case VHOST_USER_SET_PEER_CONNECTION_INIT: + vhost_pci_device_create(conn_id); + break; + default: + printf("%s called: cmd %lu not supported yet \n", __func__, cmd); + } +} + +static void vhost_pci_server_read(void *opaque, const uint8_t *buf, int size) +{ + VhostUserMsg msg; + uint8_t *p = (uint8_t *) &msg; + CharDriverState *chr = (CharDriverState *)opaque; + int fds[8], fd_num; + + if (size != VHOST_USER_HDR_SIZE) { + printf("Wrong message size received %d\n", size); + return; + } + memcpy(p, buf, VHOST_USER_HDR_SIZE); + + if (msg.size) { + p += VHOST_USER_HDR_SIZE; + size = qemu_chr_fe_read_all_n(chr, msg.conn_id, p, msg.size); + if (size != msg.size) { + printf("Wrong message size received %d != %d\n", + size, msg.size); + return; + } + } + + if (msg.request > VHOST_USER_MAX) + printf("vhost read incorrect msg \n"); + + switch(msg.request) { + case VHOST_USER_GET_CONN_ID: + vhost_pci_get_conn_id(chr, &msg); + break; + case VHOST_USER_GET_FEATURES: + vhost_pci_get_peer_features(chr, &msg); + break; + case VHOST_USER_GET_PROTOCOL_FEATURES: + vhost_pci_get_protocol_features(chr, &msg); + break; + case VHOST_USER_SET_PROTOCOL_FEATURES: + vhost_pci_set_protocol_features(&msg); + break; + case VHOST_USER_SET_DEV_INFO: + vhost_pci_set_device_info(&msg); + break; + case VHOST_USER_GET_QUEUE_NUM: + vhost_pci_get_queue_num(chr, &msg); + break; + case VHOST_USER_SET_OWNER: + break; + case VHOST_USER_SET_FEATURES: + vhost_pci_set_peer_feature_bits(&msg); + break; + case VHOST_USER_SET_VRING_NUM: + vhost_pci_alloc_peer_vring_info(msg.conn_id); + vhost_pci_set_vring_num(&msg); + break; + case VHOST_USER_SET_VRING_BASE: + vhost_pci_set_vring_base(&msg); + break; + case VHOST_USER_SET_VRING_ADDR: + vhost_pci_set_vring_addr(&msg); + break; + case VHOST_USER_SET_VRING_KICK: + /* consume the fd */ + qemu_chr_fe_get_msgfds_n(chr, msg.conn_id, fds, 1); + printf("VHOST_USER_SET_VRING_KICK called:..kickfd = %d\n", fds[0]); + vhost_pci_set_vring_kick(msg.conn_id, fds[0]); + /* + * This is a non-blocking eventfd. + * The receive function forces it to be blocking, + * so revert it back to non-blocking. + */ + qemu_set_nonblock(fds[0]); + break; + case VHOST_USER_SET_VRING_CALL: + /* consume the fd */ + qemu_chr_fe_get_msgfds_n(chr, msg.conn_id, fds, 1); + vhost_pci_set_vring_call(msg.conn_id, fds[0]); + /* + * This is a non-blocking eventfd. + * The receive function forces it to be blocking, + * so revert it back to non-blocking. + */ + qemu_set_nonblock(fds[0]); + break; + case VHOST_USER_SET_MEM_TABLE: + fd_num = qemu_chr_fe_get_msgfds_n(chr, msg.conn_id, + fds, sizeof(fds) / sizeof(int)); + printf("VHOST_USER_SET_MEM_TABLE: fd = %d \n", fd_num); + vhost_pci_set_mem_table(msg.conn_id, &msg.payload.memory, fds); + break; + case VHOST_USER_SET_PEER_CONNECTION: + vhost_pci_set_peer_connection(&msg); + break; + default: + printf("default called..msg->request = %d \n", msg.request); + break; + } +} + +static int vhost_pci_server_can_read(void *opaque) +{ + return VHOST_USER_HDR_SIZE; +} + +static void vhost_pci_server_event(void *opaque, int event) +{ + switch (event) { + case CHR_EVENT_OPENED: + printf("vhost_pci_server_event called.. \n"); + break; + case CHR_EVENT_CLOSED: + printf("vhost_pci_server_event called: event close..\n"); + break; + } +} + +static CharDriverState *vhost_pci_server_parse_chardev(const char *id) +{ + CharDriverState *chr = qemu_chr_find(id); + if (chr == NULL) { + printf("chardev \"%s\" not found", id); + return NULL; + } + + qemu_chr_fe_claim_no_fail(chr); + + return chr; +} + +int vhost_pci_server_init(QemuOpts *opts) +{ + CharDriverState *chr; + const char *chardev_id = qemu_opt_get(opts, "chardev"); + uint64_t max_connections; + + vp_server = (VhostPCIServer *)malloc(sizeof(VhostPCIServer)); + + chr = vhost_pci_server_parse_chardev(chardev_id); + if (!chr) { + return -1; + } + max_connections = chr->max_connections; + + qemu_chr_add_handlers(chr, vhost_pci_server_can_read, vhost_pci_server_read, vhost_pci_server_event, chr); + + vp_server->chr = chr; + + vp_server->peer_table = (PeerConnectionTable *)g_malloc0(max_connections * sizeof(PeerConnectionTable)); + + vp_server->old_conn_bitmap = bitmap_new(max_connections); + + return 0; +} + +int vhost_pci_server_cleanup(void) +{ + free(vp_server); + printf("vhost_pci_server_cleanup called.. \n"); + return 0; +} diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index b57454a..bce5181 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -13,6 +13,7 @@ #include "hw/virtio/vhost.h" #include "hw/virtio/vhost-backend.h" #include "hw/virtio/virtio-net.h" +#include "hw/virtio/vhost-user.h" #include "sysemu/char.h" #include "sysemu/kvm.h" #include "qemu/error-report.h" @@ -24,91 +25,6 @@ #include <sys/un.h> #include <linux/vhost.h> -#define VHOST_MEMORY_MAX_NREGIONS 8 -#define VHOST_USER_F_PROTOCOL_FEATURES 30 - -enum VhostUserProtocolFeature { - VHOST_USER_PROTOCOL_F_MQ = 0, - VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1, - VHOST_USER_PROTOCOL_F_RARP = 2, - VHOST_USER_PROTOCOL_F_REPLY_ACK = 3, - - VHOST_USER_PROTOCOL_F_MAX -}; - -#define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1) - -typedef enum VhostUserRequest { - VHOST_USER_NONE = 0, - VHOST_USER_GET_FEATURES = 1, - VHOST_USER_SET_FEATURES = 2, - VHOST_USER_SET_OWNER = 3, - VHOST_USER_RESET_OWNER = 4, - VHOST_USER_SET_MEM_TABLE = 5, - VHOST_USER_SET_LOG_BASE = 6, - VHOST_USER_SET_LOG_FD = 7, - VHOST_USER_SET_VRING_NUM = 8, - VHOST_USER_SET_VRING_ADDR = 9, - VHOST_USER_SET_VRING_BASE = 10, - VHOST_USER_GET_VRING_BASE = 11, - VHOST_USER_SET_VRING_KICK = 12, - VHOST_USER_SET_VRING_CALL = 13, - VHOST_USER_SET_VRING_ERR = 14, - VHOST_USER_GET_PROTOCOL_FEATURES = 15, - VHOST_USER_SET_PROTOCOL_FEATURES = 16, - VHOST_USER_GET_QUEUE_NUM = 17, - VHOST_USER_SET_VRING_ENABLE = 18, - VHOST_USER_SEND_RARP = 19, - VHOST_USER_MAX -} VhostUserRequest; - -typedef struct VhostUserMemoryRegion { - uint64_t guest_phys_addr; - uint64_t memory_size; - uint64_t userspace_addr; - uint64_t mmap_offset; -} VhostUserMemoryRegion; - -typedef struct VhostUserMemory { - uint32_t nregions; - uint32_t padding; - VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS]; -} VhostUserMemory; - -typedef struct VhostUserLog { - uint64_t mmap_size; - uint64_t mmap_offset; -} VhostUserLog; - -typedef struct VhostUserMsg { - VhostUserRequest request; - -#define VHOST_USER_VERSION_MASK (0x3) -#define VHOST_USER_REPLY_MASK (0x1<<2) -#define VHOST_USER_NEED_REPLY_MASK (0x1 << 3) - uint32_t flags; - uint32_t size; /* the following payload size */ - union { -#define VHOST_USER_VRING_IDX_MASK (0xff) -#define VHOST_USER_VRING_NOFD_MASK (0x1<<8) - uint64_t u64; - struct vhost_vring_state state; - struct vhost_vring_addr addr; - VhostUserMemory memory; - VhostUserLog log; - } payload; -} QEMU_PACKED VhostUserMsg; - -static VhostUserMsg m __attribute__ ((unused)); -#define VHOST_USER_HDR_SIZE (sizeof(m.request) \ - + sizeof(m.flags) \ - + sizeof(m.size)) - -#define VHOST_USER_PAYLOAD_SIZE (sizeof(m) - VHOST_USER_HDR_SIZE) - -/* The version of the protocol we support */ -#define VHOST_USER_VERSION (0x1) - static bool ioeventfd_enabled(void) { return kvm_enabled() && kvm_eventfds_enabled(); diff --git a/include/hw/virtio/vhost-pci-server.h b/include/hw/virtio/vhost-pci-server.h new file mode 100644 index 0000000..c9c4a69 --- /dev/null +++ b/include/hw/virtio/vhost-pci-server.h @@ -0,0 +1,45 @@ +#ifndef QEMU_VHOST_PCI_SERVER_H +#define QEMU_VHOST_PCI_SERVER_H + +#include <uuid/uuid.h> +#include <linux/vhost.h> + +typedef struct PeerVirtqInfo { + int kickfd; + int callfd; + uint32_t vring_num; + uint16_t last_avail_idx; + struct vhost_vring_addr addr; + QLIST_ENTRY(PeerVirtqInfo) node; +} PeerVirtqInfo; + +typedef struct PeerConnectionTable { + char dev_id[30]; + uuid_t uuid; + uint16_t virtio_id; + uint32_t bar_id; + MemoryRegion *bar_mr; + uint64_t bar_map_offset; + uint64_t peer_feature_bits; + void *opaque; + uint16_t vq_num; + QLIST_HEAD(, PeerVirtqInfo) vq_list; +} PeerConnectionTable; + +typedef struct VhostPCIServer { + CharDriverState *chr; + uint64_t protocol_features; + unsigned long *old_conn_bitmap; + /* a table indexed by the peer connection id */ + PeerConnectionTable *peer_table; +} VhostPCIServer; + +extern VhostPCIServer *vp_server; + +extern int vhost_pci_server_init(QemuOpts *opts); + +extern int vhost_pci_server_cleanup(void); + +extern PeerConnectionTable *vp_server_find_table_ent(const char *dev_id); + +#endif diff --git a/include/hw/virtio/vhost-user.h b/include/hw/virtio/vhost-user.h new file mode 100644 index 0000000..794a8d8 --- /dev/null +++ b/include/hw/virtio/vhost-user.h @@ -0,0 +1,110 @@ +#ifndef VHOST_USER_H +#define VHOST_USER_H + +#include <linux/vhost.h> +#include <uuid/uuid.h> + +#define VHOST_MEMORY_MAX_NREGIONS 8 +#define VHOST_USER_F_PROTOCOL_FEATURES 30 + +enum VhostUserProtocolFeature { + VHOST_USER_PROTOCOL_F_MQ = 0, + VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1, + VHOST_USER_PROTOCOL_F_RARP = 2, + VHOST_USER_PROTOCOL_F_REPLY_ACK = 3, + VHOST_USER_PROTOCOL_F_VHOST_PCI =4, + + VHOST_USER_PROTOCOL_F_MAX +}; + +#define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1) + +#define VHOST_USER_PROTOCOL_FEATURES ((1ULL << VHOST_USER_PROTOCOL_F_MQ) | \ + (1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) | \ + (1ULL << VHOST_USER_PROTOCOL_F_RARP)) | \ + (1ULL << VHOST_USER_PROTOCOL_F_VHOST_PCI) + +typedef enum VhostUserRequest { + VHOST_USER_NONE = 0, + VHOST_USER_GET_FEATURES = 1, + VHOST_USER_SET_FEATURES = 2, + VHOST_USER_SET_OWNER = 3, + VHOST_USER_RESET_OWNER = 4, + VHOST_USER_SET_MEM_TABLE = 5, + VHOST_USER_SET_LOG_BASE = 6, + VHOST_USER_SET_LOG_FD = 7, + VHOST_USER_SET_VRING_NUM = 8, + VHOST_USER_SET_VRING_ADDR = 9, + VHOST_USER_SET_VRING_BASE = 10, + VHOST_USER_GET_VRING_BASE = 11, + VHOST_USER_SET_VRING_KICK = 12, + VHOST_USER_SET_VRING_CALL = 13, + VHOST_USER_SET_VRING_ERR = 14, + VHOST_USER_GET_PROTOCOL_FEATURES = 15, + VHOST_USER_SET_PROTOCOL_FEATURES = 16, + VHOST_USER_GET_QUEUE_NUM = 17, + VHOST_USER_SET_VRING_ENABLE = 18, + VHOST_USER_SEND_RARP = 19, + VHOST_USER_GET_CONN_ID = 20, + VHOST_USER_SET_DEV_INFO = 21, + VHOST_USER_SET_PEER_CONNECTION = 22, + VHOST_USER_MAX +} VhostUserRequest; + +typedef struct VhostUserMemoryRegion { + uint64_t guest_phys_addr; + uint64_t memory_size; + uint64_t userspace_addr; + uint64_t mmap_offset; +} VhostUserMemoryRegion; + +typedef struct VhostUserMemory { + uint32_t nregions; + uint32_t padding; + VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS]; +} VhostUserMemory; + +typedef struct VhostUserLog { + uint64_t mmap_size; + uint64_t mmap_offset; +} VhostUserLog; + +typedef struct DeviceInfo { + uuid_t uuid; + uint16_t virtio_id; +} DeviceInfo; + +typedef struct VhostUserMsg { + VhostUserRequest request; + +#define VHOST_USER_VERSION_MASK (0x3) +#define VHOST_USER_REPLY_MASK (0x1<<2) +#define VHOST_USER_NEED_REPLY_MASK (0x1 << 3) + uint32_t flags; + uint32_t size; /* the following payload size */ + uint64_t conn_id; + union { +#define VHOST_USER_VRING_IDX_MASK (0xff) +#define VHOST_USER_VRING_NOFD_MASK (0x1<<8) + uint64_t u64; + struct vhost_vring_state state; + struct vhost_vring_addr addr; + VhostUserMemory memory; + VhostUserLog log; + DeviceInfo dev_info; + } payload; +} QEMU_PACKED VhostUserMsg; + +static VhostUserMsg m __attribute__ ((unused)); +#define VHOST_USER_HDR_SIZE (sizeof(m.request) \ + + sizeof(m.flags) \ + + sizeof(m.size)) \ + + sizeof(m.conn_id) + +#define VHOST_USER_PAYLOAD_SIZE (sizeof(m) - VHOST_USER_HDR_SIZE) + +/* The version of the protocol we support */ +#define VHOST_USER_VERSION (0x2) + +#endif + diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h index ee7c760..7f8b25c 100644 --- a/include/sysemu/sysemu.h +++ b/include/sysemu/sysemu.h @@ -244,5 +244,6 @@ extern QemuOptsList qemu_netdev_opts; extern QemuOptsList qemu_net_opts; extern QemuOptsList qemu_global_opts; extern QemuOptsList qemu_mon_opts; +extern QemuOptsList qemu_vhost_pci_server_opts; #endif diff --git a/qemu-options.hx b/qemu-options.hx index a71aaf8..1fdb820 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -3968,6 +3968,10 @@ contents of @code{iv.b64} to the second secret ETEXI +DEF("vhost-pci-server", HAS_ARG, QEMU_OPTION_vhost_pci_server, + "-vhost-pci-server socket,chrdev={id}\n" + " creates a vhost-pci-server", + QEMU_ARCH_I386) HXCOMM This is the last statement. Insert new options before this line! STEXI diff --git a/vl.c b/vl.c index b3c80d5..c1f038d 100644 --- a/vl.c +++ b/vl.c @@ -121,6 +121,7 @@ int main(int argc, char **argv) #include "crypto/init.h" #include "sysemu/replay.h" #include "qapi/qmp/qerror.h" +#include "hw/virtio/vhost-pci-server.h" #define MAX_VIRTIO_CONSOLES 1 #define MAX_SCLP_CONSOLES 1 @@ -178,6 +179,7 @@ bool boot_strict; uint8_t *boot_splash_filedata; size_t boot_splash_filedata_size; uint8_t qemu_extra_params_fw[2]; +bool vhost_pci_server_enabled; int icount_align_option; @@ -2980,6 +2982,7 @@ int main(int argc, char **argv, char **envp) qemu_add_drive_opts(&qemu_drive_opts); qemu_add_opts(&qemu_chardev_opts); qemu_add_opts(&qemu_device_opts); + qemu_add_opts(&qemu_vhost_pci_server_opts); qemu_add_opts(&qemu_netdev_opts); qemu_add_opts(&qemu_net_opts); qemu_add_opts(&qemu_rtc_opts); @@ -3970,6 +3973,13 @@ int main(int argc, char **argv, char **envp) exit(1); } break; + case QEMU_OPTION_vhost_pci_server: + vhost_pci_server_enabled = true; + opts = qemu_opts_parse_noisily(qemu_find_opts("vhost-pci-server"), optarg, false); + if (!opts) { + exit(1); + } + break; default: os_parse_cmd_args(popt->index, optarg); } @@ -4479,6 +4489,16 @@ int main(int argc, char **argv, char **envp) exit(1); } + /* check if the vhost-pci-server is enabled */ + if (vhost_pci_server_enabled) { + int ret; + ret = vhost_pci_server_init(qemu_opts_find( + qemu_find_opts("vhost-pci-server"), + NULL)); + if (ret < 0) + exit(1); + } + /* init USB devices */ if (machine_usb(current_machine)) { if (foreach_device_config(DEV_USB, usb_parse) < 0) @@ -4607,6 +4627,12 @@ int main(int argc, char **argv, char **envp) bdrv_close_all(); pause_all_vcpus(); res_free(); + if (vhost_pci_server_enabled) { + int ret; + ret = vhost_pci_server_cleanup(); + if (ret < 0) + exit(1); + } #ifdef CONFIG_TPM tpm_cleanup(); #endif
This is the slave part of vhost-user implemented in QEMU, with an extension to support vhost-pci. Signed-off-by: Wei Wang <wei.w.wang@intel.com> --- hw/virtio/Makefile.objs | 1 + hw/virtio/vhost-pci-server.c | 469 +++++++++++++++++++++++++++++++++++ hw/virtio/vhost-user.c | 86 +------ include/hw/virtio/vhost-pci-server.h | 45 ++++ include/hw/virtio/vhost-user.h | 110 ++++++++ include/sysemu/sysemu.h | 1 + qemu-options.hx | 4 + vl.c | 26 ++ 8 files changed, 657 insertions(+), 85 deletions(-) create mode 100644 hw/virtio/vhost-pci-server.c create mode 100644 include/hw/virtio/vhost-pci-server.h create mode 100644 include/hw/virtio/vhost-user.h