From patchwork Wed Sep 19 06:28:31 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Tiwei Bie X-Patchwork-Id: 10605371 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id B68E215A6 for ; Wed, 19 Sep 2018 06:31:42 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id A45202B635 for ; Wed, 19 Sep 2018 06:31:42 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id 98D862B649; Wed, 19 Sep 2018 06:31:42 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_HI autolearn=ham version=3.3.1 Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) (using TLSv1 with cipher AES256-SHA (256/256 bits)) (No client certificate requested) by mail.wl.linuxfoundation.org (Postfix) with ESMTPS id BF8A52B635 for ; Wed, 19 Sep 2018 06:31:40 +0000 (UTC) Received: from localhost ([::1]:43865 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1g2W1U-0007qe-1h for patchwork-qemu-devel@patchwork.kernel.org; Wed, 19 Sep 2018 02:31:40 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:45309) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1g2Vzk-0006dI-S0 for qemu-devel@nongnu.org; Wed, 19 Sep 2018 02:29:54 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1g2Vzg-0002Z8-Cj for qemu-devel@nongnu.org; Wed, 19 Sep 2018 02:29:52 -0400 Received: from mga11.intel.com ([192.55.52.93]:1973) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1g2Vzg-0002RZ-4X for qemu-devel@nongnu.org; Wed, 19 Sep 2018 02:29:48 -0400 X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga007.fm.intel.com ([10.253.24.52]) by fmsmga102.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 18 Sep 2018 23:29:43 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.53,392,1531810800"; d="scan'208";a="71164017" Received: from btwcube1.sh.intel.com ([10.67.104.151]) by fmsmga007.fm.intel.com with ESMTP; 18 Sep 2018 23:29:41 -0700 From: Tiwei Bie To: mst@redhat.com, alex.williamson@redhat.com, jasowang@redhat.com, qemu-devel@nongnu.org Date: Wed, 19 Sep 2018 14:28:31 +0800 Message-Id: <20180919062834.30103-2-tiwei.bie@intel.com> X-Mailer: git-send-email 2.18.0 In-Reply-To: <20180919062834.30103-1-tiwei.bie@intel.com> References: <20180919062834.30103-1-tiwei.bie@intel.com> X-detected-operating-system: by eggs.gnu.org: Genre and OS details not recognized. X-Received-From: 192.55.52.93 Subject: [Qemu-devel] [RFC v2 1/4] vfio: support creating VFIOContainer directly X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: tiwei.bie@intel.com Errors-To: qemu-devel-bounces+patchwork-qemu-devel=patchwork.kernel.org@nongnu.org Sender: "Qemu-devel" X-Virus-Scanned: ClamAV using ClamSMTP This patch introduces several APIs to support creating VFIOContainer from VFIO container fd and AddressSpace directly. These containers will be marked as external, and won't be used by the VFIO passthru code. This is useful when the container fd is opened and shared by another process and that process wants to do the IOMMU programming based on a QMEU device's DMA address space. Signed-off-by: Tiwei Bie --- hw/vfio/common.c | 164 +++++++++++++++++++++++++++++++++- hw/vfio/trace-events | 2 + include/hw/vfio/vfio-common.h | 9 ++ 3 files changed, 174 insertions(+), 1 deletion(-) diff --git a/hw/vfio/common.c b/hw/vfio/common.c index 7c185e5a2e..899d1c8f46 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -41,6 +41,7 @@ struct vfio_group_head vfio_group_list = QLIST_HEAD_INITIALIZER(vfio_group_list); struct vfio_as_head vfio_address_spaces = QLIST_HEAD_INITIALIZER(vfio_address_spaces); +QemuMutex vfio_address_spaces_lock; #ifdef CONFIG_KVM /* @@ -1043,6 +1044,8 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, int ret, fd; VFIOAddressSpace *space; + qemu_mutex_lock(&vfio_address_spaces_lock); + space = vfio_get_address_space(as); /* @@ -1073,10 +1076,14 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, qemu_balloon_inhibit(true); QLIST_FOREACH(container, &space->containers, next) { + if (container->external) { + continue; + } if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) { group->container = container; QLIST_INSERT_HEAD(&container->group_list, group, container_next); vfio_kvm_device_add_group(group); + qemu_mutex_unlock(&vfio_address_spaces_lock); return 0; } } @@ -1249,6 +1256,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, container->initialized = true; + qemu_mutex_unlock(&vfio_address_spaces_lock); return 0; listener_release_exit: QLIST_REMOVE(group, container_next); @@ -1265,6 +1273,7 @@ close_fd_exit: put_space_exit: qemu_balloon_inhibit(false); vfio_put_address_space(space); + qemu_mutex_unlock(&vfio_address_spaces_lock); return ret; } @@ -1273,6 +1282,8 @@ static void vfio_disconnect_container(VFIOGroup *group) { VFIOContainer *container = group->container; + qemu_mutex_lock(&vfio_address_spaces_lock); + QLIST_REMOVE(group, container_next); group->container = NULL; @@ -1309,6 +1320,147 @@ static void vfio_disconnect_container(VFIOGroup *group) vfio_put_address_space(space); } + + qemu_mutex_unlock(&vfio_address_spaces_lock); +} + +/* + * Currently, only TYPE1 IOMMU is supported. + */ +VFIOContainer *vfio_new_container(int container_fd, AddressSpace *as, + Error **errp) +{ + VFIOContainer *container; + int ret, fd; + VFIOAddressSpace *space; + struct vfio_iommu_type1_info info; + hwaddr pgmask; + bool v2; + + trace_vfio_new_container(container_fd); + + qemu_mutex_lock(&vfio_address_spaces_lock); + + space = vfio_get_address_space(as); + + qemu_balloon_inhibit(true); + + fd = container_fd; + if (fd < 0) { + error_setg(errp, "invalid container fd %d", fd); + goto put_space_exit; + } + + ret = ioctl(fd, VFIO_GET_API_VERSION); + if (ret != VFIO_API_VERSION) { + error_setg(errp, "supported vfio version: %d, " + "reported version: %d", VFIO_API_VERSION, ret); + goto put_space_exit; + } + + container = g_malloc0(sizeof(*container)); + container->space = space; + container->fd = fd; + QLIST_INIT(&container->giommu_list); + QLIST_INIT(&container->hostwin_list); + + if (!ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU) && + !ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_TYPE1v2_IOMMU)) { + error_setg(errp, "No available IOMMU models"); + goto free_container_exit; + } + v2 = !!ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_TYPE1v2_IOMMU); + container->iommu_type = v2 ? VFIO_TYPE1v2_IOMMU : VFIO_TYPE1_IOMMU; + + /* + * FIXME: This assumes that a Type1 IOMMU can map any 64-bit + * IOVA whatsoever. That's not actually true, but the current + * kernel interface doesn't tell us what it can map, and the + * existing Type1 IOMMUs generally support any IOVA we're + * going to actually try in practice. + */ + info.argsz = sizeof(info); + ret = ioctl(fd, VFIO_IOMMU_GET_INFO, &info); + /* Ignore errors */ + if (ret || !(info.flags & VFIO_IOMMU_INFO_PGSIZES)) { + /* Assume 4k IOVA page size */ + info.iova_pgsizes = 4096; + } + vfio_host_win_add(container, 0, (hwaddr)-1, info.iova_pgsizes); + container->pgsizes = info.iova_pgsizes; + + pgmask = (1ULL << ctz64(container->pgsizes)) - 1; + vfio_dma_unmap(container, 0, (ram_addr_t)-1 & ~pgmask); + + container->external = true; + + QLIST_INIT(&container->group_list); + QLIST_INSERT_HEAD(&space->containers, container, next); + + container->listener = vfio_memory_listener; + + memory_listener_register(&container->listener, container->space->as); + + if (container->error) { + error_setg_errno(errp, -container->error, + "memory listener initialization failed for container"); + goto listener_release_exit; + } + + container->initialized = true; + + qemu_mutex_unlock(&vfio_address_spaces_lock); + return container; + +listener_release_exit: + QLIST_REMOVE(container, next); + vfio_listener_release(container); + +free_container_exit: + g_free(container); + +put_space_exit: + qemu_balloon_inhibit(false); + vfio_put_address_space(space); + qemu_mutex_unlock(&vfio_address_spaces_lock); + + return NULL; +} + +void vfio_free_container(VFIOContainer *container) +{ + VFIOAddressSpace *space = container->space; + VFIOGuestIOMMU *giommu, *tmp; + hwaddr pgmask; + + if (!container->external) { + return; + } + + trace_vfio_free_container(container->fd); + + qemu_mutex_lock(&vfio_address_spaces_lock); + + vfio_listener_release(container); + + pgmask = (1ULL << ctz64(container->pgsizes)) - 1; + vfio_dma_unmap(container, 0, (ram_addr_t)-1 & ~pgmask); + + QLIST_REMOVE(container, next); + QLIST_FOREACH_SAFE(giommu, &container->giommu_list, giommu_next, tmp) { + memory_region_unregister_iommu_notifier( + MEMORY_REGION(giommu->iommu), &giommu->n); + QLIST_REMOVE(giommu, giommu_next); + g_free(giommu); + } + + close(container->fd); + g_free(container); + + qemu_balloon_inhibit(false); + vfio_put_address_space(space); + + qemu_mutex_unlock(&vfio_address_spaces_lock); } VFIOGroup *vfio_get_group(int groupid, AddressSpace *as, Error **errp) @@ -1601,9 +1753,13 @@ static int vfio_eeh_container_op(VFIOContainer *container, uint32_t op) static VFIOContainer *vfio_eeh_as_container(AddressSpace *as) { - VFIOAddressSpace *space = vfio_get_address_space(as); + VFIOAddressSpace *space; VFIOContainer *container = NULL; + qemu_mutex_lock(&vfio_address_spaces_lock); + + space = vfio_get_address_space(as); + if (QLIST_EMPTY(&space->containers)) { /* No containers to act on */ goto out; @@ -1620,6 +1776,7 @@ static VFIOContainer *vfio_eeh_as_container(AddressSpace *as) out: vfio_put_address_space(space); + qemu_mutex_unlock(&vfio_address_spaces_lock); return container; } @@ -1639,3 +1796,8 @@ int vfio_eeh_as_op(AddressSpace *as, uint32_t op) } return vfio_eeh_container_op(container, op); } + +static void __attribute__((__constructor__)) vfio_common_init(void) +{ + qemu_mutex_init(&vfio_address_spaces_lock); +} diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events index a85e8662ea..4e123d6cd9 100644 --- a/hw/vfio/trace-events +++ b/hw/vfio/trace-events @@ -98,6 +98,8 @@ vfio_listener_region_add_no_dma_map(const char *name, uint64_t iova, uint64_t si vfio_listener_region_del_skip(uint64_t start, uint64_t end) "SKIPPING region_del 0x%"PRIx64" - 0x%"PRIx64 vfio_listener_region_del(uint64_t start, uint64_t end) "region_del 0x%"PRIx64" - 0x%"PRIx64 vfio_disconnect_container(int fd) "close container->fd=%d" +vfio_new_container(int fd) "new container->fd=%d" +vfio_free_container(int fd) "free container->fd=%d" vfio_put_group(int fd) "close group->fd=%d" vfio_get_device(const char * name, unsigned int flags, unsigned int num_regions, unsigned int num_irqs) "Device %s flags: %u, regions: %u, irqs: %u" vfio_put_base_device(int fd) "close vdev->fd=%d" diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h index 821def0565..be87a6125a 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h @@ -73,6 +73,7 @@ typedef struct VFIOContainer { unsigned iommu_type; int error; bool initialized; + bool external; /* Used outside the hw/vfio */ unsigned long pgsizes; /* * This assumes the host IOMMU can support only a single @@ -180,6 +181,7 @@ int vfio_get_device(VFIOGroup *group, const char *name, extern const MemoryRegionOps vfio_region_ops; extern QLIST_HEAD(vfio_group_head, VFIOGroup) vfio_group_list; extern QLIST_HEAD(vfio_as_head, VFIOAddressSpace) vfio_address_spaces; +extern QemuMutex vfio_address_spaces_lock; #ifdef CONFIG_LINUX int vfio_get_region_info(VFIODevice *vbasedev, int index, @@ -196,4 +198,11 @@ int vfio_spapr_create_window(VFIOContainer *container, int vfio_spapr_remove_window(VFIOContainer *container, hwaddr offset_within_address_space); +/* + * APIs used by modules outside hw/vfio. + */ +VFIOContainer *vfio_new_container(int container_fd, AddressSpace *as, + Error **errp); +void vfio_free_container(VFIOContainer *container); + #endif /* HW_VFIO_VFIO_COMMON_H */ From patchwork Wed Sep 19 06:28:32 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Tiwei Bie X-Patchwork-Id: 10605369 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id A1A01112B for ; Wed, 19 Sep 2018 06:31:41 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 8F4F82B647 for ; Wed, 19 Sep 2018 06:31:41 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id 818F12B64E; Wed, 19 Sep 2018 06:31:41 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_HI autolearn=ham version=3.3.1 Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) (using TLSv1 with cipher AES256-SHA (256/256 bits)) (No client certificate requested) by mail.wl.linuxfoundation.org (Postfix) with ESMTPS id E96102B647 for ; Wed, 19 Sep 2018 06:31:40 +0000 (UTC) Received: from localhost ([::1]:43856 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1g2W1U-0006gB-6Q for patchwork-qemu-devel@patchwork.kernel.org; Wed, 19 Sep 2018 02:31:40 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:45347) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1g2Vzn-0006eX-W8 for qemu-devel@nongnu.org; Wed, 19 Sep 2018 02:29:57 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1g2Vzm-0002ek-UC for qemu-devel@nongnu.org; Wed, 19 Sep 2018 02:29:55 -0400 Received: from mga11.intel.com ([192.55.52.93]:1984) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1g2Vzm-0002dt-L6 for qemu-devel@nongnu.org; Wed, 19 Sep 2018 02:29:54 -0400 X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga007.fm.intel.com ([10.253.24.52]) by fmsmga102.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 18 Sep 2018 23:29:53 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.53,392,1531810800"; d="scan'208";a="71164021" Received: from btwcube1.sh.intel.com ([10.67.104.151]) by fmsmga007.fm.intel.com with ESMTP; 18 Sep 2018 23:29:43 -0700 From: Tiwei Bie To: mst@redhat.com, alex.williamson@redhat.com, jasowang@redhat.com, qemu-devel@nongnu.org Date: Wed, 19 Sep 2018 14:28:32 +0800 Message-Id: <20180919062834.30103-3-tiwei.bie@intel.com> X-Mailer: git-send-email 2.18.0 In-Reply-To: <20180919062834.30103-1-tiwei.bie@intel.com> References: <20180919062834.30103-1-tiwei.bie@intel.com> X-detected-operating-system: by eggs.gnu.org: Genre and OS details not recognized. X-Received-From: 192.55.52.93 Subject: [Qemu-devel] [RFC v2 2/4] vhost-user: support programming VFIO container in master X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: tiwei.bie@intel.com Errors-To: qemu-devel-bounces+patchwork-qemu-devel=patchwork.kernel.org@nongnu.org Sender: "Qemu-devel" X-Virus-Scanned: ClamAV using ClamSMTP This patch introduces a slave message to allow slave to share its VFIO container fd to master and do the IOMMU programming based on virtio device's DMA address space for the VFIO groups inside this VFIO container in QEMU. For the vhost backends which support vDPA, they can leverage this message to ask master to do the IOMMU programming in QEMU for the vDPA device in backend. Signed-off-by: Tiwei Bie --- docs/interop/vhost-user.txt | 21 ++++++++++++++++ hw/virtio/vhost-user.c | 45 ++++++++++++++++++++++++++++++++++ include/hw/virtio/vhost-user.h | 2 ++ 3 files changed, 68 insertions(+) diff --git a/docs/interop/vhost-user.txt b/docs/interop/vhost-user.txt index f59667f498..2ac250aa01 100644 --- a/docs/interop/vhost-user.txt +++ b/docs/interop/vhost-user.txt @@ -397,6 +397,7 @@ Protocol features #define VHOST_USER_PROTOCOL_F_CONFIG 9 #define VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD 10 #define VHOST_USER_PROTOCOL_F_HOST_NOTIFIER 11 +#define VHOST_USER_PROTOCOL_F_VFIO_CONTAINER 12 Master message types -------------------- @@ -815,6 +816,26 @@ Slave message types This request should be sent only when VHOST_USER_PROTOCOL_F_HOST_NOTIFIER protocol feature has been successfully negotiated. + * VHOST_USER_SLAVE_VFIO_CONTAINER_MSG + + Id: 4 + Equivalent ioctl: N/A + Slave payload: N/A + Master payload: N/A + + When VHOST_USER_PROTOCOL_F_VFIO_CONTAINER is negotiated, vhost-user + slave could send this request to share its VFIO container fd via + ancillary data to master. Before sending a VFIO container fd to + master, slave should make sure that IOMMU type has already been + set correctly. After receiving this request from slave, master will + destroy the existing VFIO container (including clearing all the DMA + mappings and closing the container fd) if any and setup a new VFIO + container (including clearing all the existing DMA mappings in this + new container and setup DMA mappings for this container based on + virtio device's DMA address space) if the request is sent with a + file descriptor. + + VHOST_USER_PROTOCOL_F_REPLY_ACK: ------------------------------- The original vhost-user specification only demands replies for certain diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index b041343632..d53787529d 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -52,6 +52,7 @@ enum VhostUserProtocolFeature { VHOST_USER_PROTOCOL_F_CONFIG = 9, VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD = 10, VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11, + VHOST_USER_PROTOCOL_F_VFIO_CONTAINER = 12, VHOST_USER_PROTOCOL_F_MAX }; @@ -97,6 +98,7 @@ typedef enum VhostUserSlaveRequest { VHOST_USER_SLAVE_IOTLB_MSG = 1, VHOST_USER_SLAVE_CONFIG_CHANGE_MSG = 2, VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3, + VHOST_USER_SLAVE_VFIO_CONTAINER_MSG = 4, VHOST_USER_SLAVE_MAX } VhostUserSlaveRequest; @@ -949,6 +951,41 @@ static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev, return 0; } +static int vhost_user_slave_handle_vfio_container(struct vhost_dev *dev, + int *fd) +{ + struct vhost_user *u = dev->opaque; + VhostUserState *user = u->user; + VirtIODevice *vdev = dev->vdev; + int container_fd = fd[0]; + VFIOContainer *container; + + if (!virtio_has_feature(dev->protocol_features, + VHOST_USER_PROTOCOL_F_VFIO_CONTAINER) || + vdev == NULL) { + return -1; + } + + if (user->vfio_container) { + vfio_free_container(user->vfio_container); + user->vfio_container = NULL; + } + + if (container_fd < 0) { + return 0; + } + + container = vfio_new_container(container_fd, vdev->dma_as, NULL); + if (container == NULL) { + return -1; + } + + user->vfio_container = container; + fd[0] = -1; + + return 0; +} + static void slave_read(void *opaque) { struct vhost_dev *dev = opaque; @@ -1021,6 +1058,9 @@ static void slave_read(void *opaque) ret = vhost_user_slave_handle_vring_host_notifier(dev, &payload.area, fd[0]); break; + case VHOST_USER_SLAVE_VFIO_CONTAINER_MSG: + ret = vhost_user_slave_handle_vfio_container(dev, fd); + break; default: error_report("Received unexpected msg type."); ret = -EINVAL; @@ -1761,6 +1801,11 @@ void vhost_user_cleanup(VhostUserState *user) user->notifier[i].addr = NULL; } } + + if (user->vfio_container) { + vfio_free_container(user->vfio_container); + user->vfio_container = NULL; + } } const VhostOps user_ops = { diff --git a/include/hw/virtio/vhost-user.h b/include/hw/virtio/vhost-user.h index fd660393a0..99c6dbbbff 100644 --- a/include/hw/virtio/vhost-user.h +++ b/include/hw/virtio/vhost-user.h @@ -10,6 +10,7 @@ #include "chardev/char-fe.h" #include "hw/virtio/virtio.h" +#include "hw/vfio/vfio-common.h" typedef struct VhostUserHostNotifier { MemoryRegion mr; @@ -20,6 +21,7 @@ typedef struct VhostUserHostNotifier { typedef struct VhostUserState { CharBackend *chr; VhostUserHostNotifier notifier[VIRTIO_QUEUE_MAX]; + VFIOContainer *vfio_container; } VhostUserState; VhostUserState *vhost_user_init(void); From patchwork Wed Sep 19 06:28:33 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Tiwei Bie X-Patchwork-Id: 10605375 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id CA4A4112B for ; Wed, 19 Sep 2018 06:34:15 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id B99062B649 for ; Wed, 19 Sep 2018 06:34:15 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id AE0C02B64E; Wed, 19 Sep 2018 06:34:15 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_HI autolearn=ham version=3.3.1 Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) (using TLSv1 with cipher AES256-SHA (256/256 bits)) (No client certificate requested) by mail.wl.linuxfoundation.org (Postfix) with ESMTPS id 554802B649 for ; Wed, 19 Sep 2018 06:34:15 +0000 (UTC) Received: from localhost ([::1]:43873 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1g2W3y-0001Rs-LS for patchwork-qemu-devel@patchwork.kernel.org; Wed, 19 Sep 2018 02:34:14 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:45350) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1g2Vzo-0006eY-8D for qemu-devel@nongnu.org; Wed, 19 Sep 2018 02:29:57 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1g2Vzn-0002fJ-Ck for qemu-devel@nongnu.org; Wed, 19 Sep 2018 02:29:56 -0400 Received: from mga11.intel.com ([192.55.52.93]:1984) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1g2Vzn-0002dt-4X for qemu-devel@nongnu.org; Wed, 19 Sep 2018 02:29:55 -0400 X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga007.fm.intel.com ([10.253.24.52]) by fmsmga102.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 18 Sep 2018 23:29:53 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.53,392,1531810800"; d="scan'208";a="71164029" Received: from btwcube1.sh.intel.com ([10.67.104.151]) by fmsmga007.fm.intel.com with ESMTP; 18 Sep 2018 23:29:44 -0700 From: Tiwei Bie To: mst@redhat.com, alex.williamson@redhat.com, jasowang@redhat.com, qemu-devel@nongnu.org Date: Wed, 19 Sep 2018 14:28:33 +0800 Message-Id: <20180919062834.30103-4-tiwei.bie@intel.com> X-Mailer: git-send-email 2.18.0 In-Reply-To: <20180919062834.30103-1-tiwei.bie@intel.com> References: <20180919062834.30103-1-tiwei.bie@intel.com> X-detected-operating-system: by eggs.gnu.org: Genre and OS details not recognized. X-Received-From: 192.55.52.93 Subject: [Qemu-devel] [RFC v2 3/4] libvhost-user: support VFIO container message X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: tiwei.bie@intel.com Errors-To: qemu-devel-bounces+patchwork-qemu-devel=patchwork.kernel.org@nongnu.org Sender: "Qemu-devel" X-Virus-Scanned: ClamAV using ClamSMTP This patch introduces the VFIO container message support in libvhost-user. A new API is added to support setting the VFIO container for the vhost device. Signed-off-by: Tiwei Bie --- contrib/libvhost-user/libvhost-user.c | 29 ++++++++++++++++++++++++++- contrib/libvhost-user/libvhost-user.h | 12 +++++++++++ 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c index a6b46cdc03..b960449f9e 100644 --- a/contrib/libvhost-user/libvhost-user.c +++ b/contrib/libvhost-user/libvhost-user.c @@ -979,6 +979,32 @@ bool vu_set_queue_host_notifier(VuDev *dev, VuVirtq *vq, int fd, return vu_process_message_reply(dev, &vmsg); } +bool vu_set_vfio_container(VuDev *dev, int fd) +{ + int fd_num = 0; + VhostUserMsg vmsg = { + .request = VHOST_USER_SLAVE_VFIO_CONTAINER_MSG, + .flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, + .size = 0, + }; + + if (fd != -1) { + vmsg.fds[fd_num++] = fd; + } + + vmsg.fd_num = fd_num; + + if ((dev->protocol_features & VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD) == 0) { + return false; + } + + if (!vu_message_write(dev, dev->slave_fd, &vmsg)) { + return false; + } + + return vu_process_message_reply(dev, &vmsg); +} + static bool vu_set_vring_call_exec(VuDev *dev, VhostUserMsg *vmsg) { @@ -1033,7 +1059,8 @@ vu_get_protocol_features_exec(VuDev *dev, VhostUserMsg *vmsg) uint64_t features = 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD | 1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ | 1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER | - 1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD; + 1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD | + 1ULL << VHOST_USER_PROTOCOL_F_VFIO_CONTAINER; if (have_userfault()) { features |= 1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT; diff --git a/contrib/libvhost-user/libvhost-user.h b/contrib/libvhost-user/libvhost-user.h index 4aa55b4d2d..5232bd85ab 100644 --- a/contrib/libvhost-user/libvhost-user.h +++ b/contrib/libvhost-user/libvhost-user.h @@ -53,6 +53,7 @@ enum VhostUserProtocolFeature { VHOST_USER_PROTOCOL_F_CONFIG = 9, VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD = 10, VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11, + VHOST_USER_PROTOCOL_F_VFIO_CONTAINER = 12, VHOST_USER_PROTOCOL_F_MAX }; @@ -99,6 +100,7 @@ typedef enum VhostUserSlaveRequest { VHOST_USER_SLAVE_IOTLB_MSG = 1, VHOST_USER_SLAVE_CONFIG_CHANGE_MSG = 2, VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3, + VHOST_USER_SLAVE_VFIO_CONTAINER_MSG = 4, VHOST_USER_SLAVE_MAX } VhostUserSlaveRequest; @@ -401,6 +403,16 @@ void vu_set_queue_handler(VuDev *dev, VuVirtq *vq, bool vu_set_queue_host_notifier(VuDev *dev, VuVirtq *vq, int fd, int size, int offset); +/** + * vu_set_vfio_container: + * @dev: a VuDev context + * @fd: a VFIO container file descriptor + * + * Set device's VFIO container file descriptor. If called with + * -1 @fd, the container is destroyed. + */ +bool vu_set_vfio_container(VuDev *dev, int fd); + /** * vu_queue_set_notification: * @dev: a VuDev context From patchwork Wed Sep 19 06:28:34 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Tiwei Bie X-Patchwork-Id: 10605373 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id ABE34112B for ; Wed, 19 Sep 2018 06:34:13 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 9B48F2B649 for ; Wed, 19 Sep 2018 06:34:13 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id 8EEA22B64E; Wed, 19 Sep 2018 06:34:13 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_HI autolearn=ham version=3.3.1 Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) (using TLSv1 with cipher AES256-SHA (256/256 bits)) (No client certificate requested) by mail.wl.linuxfoundation.org (Postfix) with ESMTPS id 2680A2B649 for ; Wed, 19 Sep 2018 06:34:13 +0000 (UTC) Received: from localhost ([::1]:43872 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1g2W3v-0001QC-VZ for patchwork-qemu-devel@patchwork.kernel.org; Wed, 19 Sep 2018 02:34:12 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:45363) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1g2Vzo-0006ep-S8 for qemu-devel@nongnu.org; Wed, 19 Sep 2018 02:29:57 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1g2Vzn-0002fg-TN for qemu-devel@nongnu.org; Wed, 19 Sep 2018 02:29:56 -0400 Received: from mga11.intel.com ([192.55.52.93]:1984) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1g2Vzn-0002dt-JQ for qemu-devel@nongnu.org; Wed, 19 Sep 2018 02:29:55 -0400 X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga007.fm.intel.com ([10.253.24.52]) by fmsmga102.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 18 Sep 2018 23:29:53 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.53,392,1531810800"; d="scan'208";a="71164034" Received: from btwcube1.sh.intel.com ([10.67.104.151]) by fmsmga007.fm.intel.com with ESMTP; 18 Sep 2018 23:29:46 -0700 From: Tiwei Bie To: mst@redhat.com, alex.williamson@redhat.com, jasowang@redhat.com, qemu-devel@nongnu.org Date: Wed, 19 Sep 2018 14:28:34 +0800 Message-Id: <20180919062834.30103-5-tiwei.bie@intel.com> X-Mailer: git-send-email 2.18.0 In-Reply-To: <20180919062834.30103-1-tiwei.bie@intel.com> References: <20180919062834.30103-1-tiwei.bie@intel.com> X-detected-operating-system: by eggs.gnu.org: Genre and OS details not recognized. X-Received-From: 192.55.52.93 Subject: [Qemu-devel] [RFC v2 4/4] vhost-user-bridge: support VFIO container message X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: tiwei.bie@intel.com Errors-To: qemu-devel-bounces+patchwork-qemu-devel=patchwork.kernel.org@nongnu.org Sender: "Qemu-devel" X-Virus-Scanned: ClamAV using ClamSMTP This patch introduces the VFIO container message support in vhost-user-bridge. A new option (-G) is added to set the VFIO container for the vhost device. This is mainly used to test the VFIO container message implementation in vhost user. Signed-off-by: Tiwei Bie --- tests/vhost-user-bridge.c | 63 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 61 insertions(+), 2 deletions(-) diff --git a/tests/vhost-user-bridge.c b/tests/vhost-user-bridge.c index 0884294141..b668462b49 100644 --- a/tests/vhost-user-bridge.c +++ b/tests/vhost-user-bridge.c @@ -35,6 +35,9 @@ #include "standard-headers/linux/virtio_net.h" #include "contrib/libvhost-user/libvhost-user.h" +#include +#include + #define VHOST_USER_BRIDGE_DEBUG 1 #define DPRINT(...) \ @@ -71,6 +74,8 @@ typedef struct VubrDev { void *addr; pthread_t thread; } notifier; + int vfio_container; + int vfio_group; } VubrDev; static void @@ -467,6 +472,12 @@ vubr_queue_set_started(VuDev *dev, int qidx, bool started) qidx * getpagesize()); } + /* We can test setting VFIO container multiple times + * by doing this in queue start */ + if (started && vubr->vfio_container >= 0) { + vu_set_vfio_container(dev, vubr->vfio_container); + } + if (qidx % 2 == 1) { vu_set_queue_handler(dev, vq, started ? vubr_handle_tx : NULL); } @@ -537,6 +548,7 @@ vubr_new(const char *path, bool client) } dev->notifier.fd = -1; + dev->vfio_container = -1; un.sun_family = AF_UNIX; strcpy(un.sun_path, path); @@ -642,6 +654,43 @@ vubr_host_notifier_setup(VubrDev *dev) dev->notifier.thread = thread; } +static void +vubr_vfio_container_setup(VubrDev *dev, const char *vfio_group) +{ + int container_fd; + int group_fd; + + container_fd = open("/dev/vfio/vfio", O_RDWR); + if (container_fd < 0) { + vubr_die("open(/dev/vfio/vfio)"); + } + + group_fd = open(vfio_group, O_RDWR); + if (group_fd < 0) { + vubr_die(vfio_group); + } + + if (ioctl(group_fd, VFIO_GROUP_SET_CONTAINER, &container_fd) < 0) { + vubr_die("ioctl(VFIO_GROUP_SET_CONTAINER)"); + } + + if (ioctl(container_fd, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU) || + ioctl(container_fd, VFIO_CHECK_EXTENSION, VFIO_TYPE1v2_IOMMU)) { + bool v2 = !!ioctl(container_fd, VFIO_CHECK_EXTENSION, + VFIO_TYPE1v2_IOMMU); + + if (ioctl(container_fd, VFIO_SET_IOMMU, v2 ? VFIO_TYPE1v2_IOMMU : + VFIO_TYPE1_IOMMU) < 0) { + vubr_die("ioctl(VFIO_SET_IOMMU)"); + } + } else { + vubr_die("No available IOMMU models"); + } + + dev->vfio_container = container_fd; + dev->vfio_group = group_fd; +} + static void vubr_set_host(struct sockaddr_in *saddr, const char *host) { @@ -757,8 +806,9 @@ main(int argc, char *argv[]) int opt; bool client = false; bool host_notifier = false; + char *vfio_group = NULL; - while ((opt = getopt(argc, argv, "l:r:u:cH")) != -1) { + while ((opt = getopt(argc, argv, "l:r:u:cHG:")) != -1) { switch (opt) { case 'l': @@ -780,6 +830,9 @@ main(int argc, char *argv[]) case 'H': host_notifier = true; break; + case 'G': + vfio_group = optarg; + break; default: goto out; } @@ -799,6 +852,10 @@ main(int argc, char *argv[]) vubr_host_notifier_setup(dev); } + if (vfio_group) { + vubr_vfio_container_setup(dev, vfio_group); + } + vubr_backend_udp_setup(dev, lhost, lport, rhost, rport); vubr_run(dev); @@ -808,7 +865,8 @@ main(int argc, char *argv[]) out: fprintf(stderr, "Usage: %s ", argv[0]); - fprintf(stderr, "[-c] [-H] [-u ud_socket_path] [-l lhost:lport] [-r rhost:rport]\n"); + fprintf(stderr, "[-c] [-H] [-u ud_socket_path] [-l lhost:lport]\n"); + fprintf(stderr, "\t\t[-r rhost:rport] [-G /dev/vfio/GROUP]\n"); fprintf(stderr, "\t-u path to unix doman socket. default: %s\n", DEFAULT_UD_SOCKET); fprintf(stderr, "\t-l local host and port. default: %s:%s\n", @@ -817,6 +875,7 @@ out: DEFAULT_RHOST, DEFAULT_RPORT); fprintf(stderr, "\t-c client mode\n"); fprintf(stderr, "\t-H use host notifier\n"); + fprintf(stderr, "\t-G VFIO group path.\n"); return 1; }