@@ -167,6 +167,35 @@ static int vhost_kernel_get_vq_index(struct vhost_dev *dev, int idx)
return idx - dev->vq_index;
}
+static int vhost_kernel_set_vring_iotlb_request(struct vhost_dev *dev,
+ struct
+ vhost_vring_iotlb_entry
+ *entry)
+{
+ int r = vhost_kernel_call(dev, VHOST_SET_VRING_IOTLB_REQUEST, entry);
+ return r;
+}
+
+static int vhost_kernel_update_iotlb(struct vhost_dev *dev,
+ struct vhost_iotlb_entry *entry)
+{
+ int r = vhost_kernel_call(dev, VHOST_UPDATE_IOTLB, entry);
+ return r;
+}
+
+static int vhost_kernel_run_iotlb(struct vhost_dev *dev,
+ int *enabled)
+{
+ int r = vhost_kernel_call(dev, VHOST_RUN_IOTLB, enabled);
+ return r;
+}
+
+static int vhost_kernel_set_vring_iotlb_call(struct vhost_dev *dev,
+ struct vhost_vring_file *file)
+{
+ return vhost_kernel_call(dev, VHOST_SET_VRING_IOTLB_CALL, file);
+}
+
static const VhostOps kernel_ops = {
.backend_type = VHOST_BACKEND_TYPE_KERNEL,
.vhost_backend_init = vhost_kernel_init,
@@ -190,6 +219,10 @@ static const VhostOps kernel_ops = {
.vhost_set_owner = vhost_kernel_set_owner,
.vhost_reset_device = vhost_kernel_reset_device,
.vhost_get_vq_index = vhost_kernel_get_vq_index,
+ .vhost_set_vring_iotlb_request = vhost_kernel_set_vring_iotlb_request,
+ .vhost_update_iotlb = vhost_kernel_update_iotlb,
+ .vhost_set_vring_iotlb_call = vhost_kernel_set_vring_iotlb_call,
+ .vhost_run_iotlb = vhost_kernel_run_iotlb,
};
int vhost_set_backend_type(struct vhost_dev *dev, VhostBackendType backend_type)
@@ -22,6 +22,7 @@
#include "qemu/memfd.h"
#include <linux/vhost.h>
#include "exec/address-spaces.h"
+#include "exec/ram_addr.h"
#include "hw/virtio/virtio-bus.h"
#include "hw/virtio/virtio-access.h"
#include "migration/migration.h"
@@ -407,6 +408,7 @@ static int vhost_verify_ring_mappings(struct vhost_dev *dev,
uint64_t start_addr,
uint64_t size)
{
+ #if 0
int i;
int r = 0;
@@ -419,7 +421,7 @@ static int vhost_verify_ring_mappings(struct vhost_dev *dev,
continue;
}
l = vq->ring_size;
- p = cpu_physical_memory_map(vq->ring_phys, &l, 1);
+ p = virtio_memory_map(dev->vdev, vq->ring_phys, &l, 1);
if (!p || l != vq->ring_size) {
fprintf(stderr, "Unable to map ring buffer for ring %d\n", i);
r = -ENOMEM;
@@ -428,9 +430,11 @@ static int vhost_verify_ring_mappings(struct vhost_dev *dev,
fprintf(stderr, "Ring buffer relocated for ring %d\n", i);
r = -EBUSY;
}
- cpu_physical_memory_unmap(p, l, 0, 0);
+ virtio_memory_unmap(dev->vdev, p, l, 0, 0);
}
return r;
+ #endif
+ return 0;
}
static struct vhost_memory_region *vhost_dev_find_reg(struct vhost_dev *dev,
@@ -662,6 +666,22 @@ static int vhost_dev_set_features(struct vhost_dev *dev, bool enable_log)
return r < 0 ? -errno : 0;
}
+static int vhost_dev_update_iotlb(struct vhost_dev *dev,
+ struct vhost_iotlb_entry *entry)
+{
+ int r;
+ r = dev->vhost_ops->vhost_update_iotlb(dev, entry);
+ return r < 0 ? -errno : 0;
+}
+
+static int vhost_run_iotlb(struct vhost_dev *dev,
+ int *enabled)
+{
+ int r;
+ r = dev->vhost_ops->vhost_run_iotlb(dev, enabled);
+ return r < 0 ? -errno : 0;
+}
+
static int vhost_dev_set_log(struct vhost_dev *dev, bool enable_log)
{
int r, t, i, idx;
@@ -798,6 +818,73 @@ static int vhost_virtqueue_set_vring_endian_legacy(struct vhost_dev *dev,
return -errno;
}
+static int vhost_memory_region_lookup(struct vhost_dev *hdev,
+ __u64 gpa, __u64 *uaddr, __u64 *len)
+{
+ int i;
+
+ for (i = 0; i < hdev->mem->nregions; i++) {
+ struct vhost_memory_region *reg = hdev->mem->regions + i;
+
+ if (gpa >= reg->guest_phys_addr &&
+ reg->guest_phys_addr + reg->memory_size > gpa) {
+ *uaddr = reg->userspace_addr + gpa - reg->guest_phys_addr;
+ *len = reg->guest_phys_addr + reg->memory_size - gpa;
+ return 0;
+ }
+ }
+
+ return -EFAULT;
+}
+
+static void vhost_device_iotlb_request(void *opaque)
+{
+ IOMMUTLBEntry iotlb;
+ struct vhost_virtqueue *vq = opaque;
+ struct vhost_dev *hdev = vq->dev;
+ struct vhost_iotlb_entry *request = vq->iotlb_req;
+ struct vhost_iotlb_entry reply = *request;
+
+ rcu_read_lock();
+
+ event_notifier_test_and_clear(&vq->iotlb_notifier);
+
+ reply.flags.type = VHOST_IOTLB_UPDATE;
+ reply.flags.valid = VHOST_IOTLB_INVALID;
+
+ if (request->flags.type != VHOST_IOTLB_MISS) {
+ goto done;
+ }
+
+ iotlb = address_space_get_iotlb_entry(virtio_get_dma_as(hdev->vdev),
+ request->iova,
+ false);
+ if (iotlb.target_as != NULL) {
+ if (vhost_memory_region_lookup(hdev, iotlb.translated_addr,
+ &reply.userspace_addr,
+ &reply.size)) {
+ goto done;
+ }
+ reply.iova = reply.iova & ~iotlb.addr_mask;
+ reply.size = MIN(iotlb.addr_mask + 1, reply.size);
+ if (iotlb.perm == IOMMU_RO) {
+ reply.flags.perm = VHOST_ACCESS_RO;
+ } else if (iotlb.perm == IOMMU_WO) {
+ reply.flags.perm = VHOST_ACCESS_WO;
+ } else if (iotlb.perm == IOMMU_RW) {
+ reply.flags.perm = VHOST_ACCESS_RW;
+ } else {
+ fprintf(stderr, "unknown iotlb perm!\n");
+ }
+ reply.flags.type = VHOST_IOTLB_UPDATE;
+ reply.flags.valid = VHOST_IOTLB_VALID;
+ }
+
+done:
+ vhost_dev_update_iotlb(hdev, &reply);
+ rcu_read_unlock();
+}
+
static int vhost_virtqueue_start(struct vhost_dev *dev,
struct VirtIODevice *vdev,
struct vhost_virtqueue *vq,
@@ -838,21 +925,21 @@ static int vhost_virtqueue_start(struct vhost_dev *dev,
s = l = virtio_queue_get_desc_size(vdev, idx);
a = virtio_queue_get_desc_addr(vdev, idx);
- vq->desc = cpu_physical_memory_map(a, &l, 0);
+ vq->desc = virtio_memory_map(vdev, a, &l, 0);
if (!vq->desc || l != s) {
r = -ENOMEM;
goto fail_alloc_desc;
}
s = l = virtio_queue_get_avail_size(vdev, idx);
a = virtio_queue_get_avail_addr(vdev, idx);
- vq->avail = cpu_physical_memory_map(a, &l, 0);
+ vq->avail = virtio_memory_map(vdev, a, &l, 0);
if (!vq->avail || l != s) {
r = -ENOMEM;
goto fail_alloc_avail;
}
vq->used_size = s = l = virtio_queue_get_used_size(vdev, idx);
vq->used_phys = a = virtio_queue_get_used_addr(vdev, idx);
- vq->used = cpu_physical_memory_map(a, &l, 1);
+ vq->used = virtio_memory_map(vdev, a, &l, 1);
if (!vq->used || l != s) {
r = -ENOMEM;
goto fail_alloc_used;
@@ -860,7 +947,7 @@ static int vhost_virtqueue_start(struct vhost_dev *dev,
vq->ring_size = s = l = virtio_queue_get_ring_size(vdev, idx);
vq->ring_phys = a = virtio_queue_get_ring_addr(vdev, idx);
- vq->ring = cpu_physical_memory_map(a, &l, 1);
+ vq->ring = virtio_memory_map(vdev, a, &l, 1);
if (!vq->ring || l != s) {
r = -ENOMEM;
goto fail_alloc_ring;
@@ -891,20 +978,19 @@ static int vhost_virtqueue_start(struct vhost_dev *dev,
}
return 0;
-
fail_kick:
fail_alloc:
- cpu_physical_memory_unmap(vq->ring, virtio_queue_get_ring_size(vdev, idx),
- 0, 0);
+ virtio_memory_unmap(vdev, vq->ring, virtio_queue_get_ring_size(vdev, idx),
+ 0, 0);
fail_alloc_ring:
- cpu_physical_memory_unmap(vq->used, virtio_queue_get_used_size(vdev, idx),
- 0, 0);
+ virtio_memory_unmap(vdev, vq->used, virtio_queue_get_used_size(vdev, idx),
+ 0, 0);
fail_alloc_used:
- cpu_physical_memory_unmap(vq->avail, virtio_queue_get_avail_size(vdev, idx),
- 0, 0);
+ virtio_memory_unmap(vdev, vq->avail, virtio_queue_get_avail_size(vdev, idx),
+ 0, 0);
fail_alloc_avail:
- cpu_physical_memory_unmap(vq->desc, virtio_queue_get_desc_size(vdev, idx),
- 0, 0);
+ virtio_memory_unmap(vdev, vq->desc, virtio_queue_get_desc_size(vdev, idx),
+ 0, 0);
fail_alloc_desc:
return r;
}
@@ -941,14 +1027,14 @@ static void vhost_virtqueue_stop(struct vhost_dev *dev,
}
assert (r >= 0);
- cpu_physical_memory_unmap(vq->ring, virtio_queue_get_ring_size(vdev, idx),
- 0, virtio_queue_get_ring_size(vdev, idx));
- cpu_physical_memory_unmap(vq->used, virtio_queue_get_used_size(vdev, idx),
- 1, virtio_queue_get_used_size(vdev, idx));
- cpu_physical_memory_unmap(vq->avail, virtio_queue_get_avail_size(vdev, idx),
- 0, virtio_queue_get_avail_size(vdev, idx));
- cpu_physical_memory_unmap(vq->desc, virtio_queue_get_desc_size(vdev, idx),
- 0, virtio_queue_get_desc_size(vdev, idx));
+ virtio_memory_unmap(vdev, vq->ring, virtio_queue_get_ring_size(vdev, idx),
+ 0, virtio_queue_get_ring_size(vdev, idx));
+ virtio_memory_unmap(vdev, vq->used, virtio_queue_get_used_size(vdev, idx),
+ 1, virtio_queue_get_used_size(vdev, idx));
+ virtio_memory_unmap(vdev, vq->avail, virtio_queue_get_avail_size(vdev, idx),
+ 0, virtio_queue_get_avail_size(vdev, idx));
+ virtio_memory_unmap(vdev, vq->desc, virtio_queue_get_desc_size(vdev, idx),
+ 0, virtio_queue_get_desc_size(vdev, idx));
}
static void vhost_eventfd_add(MemoryListener *listener,
@@ -970,6 +1056,9 @@ static int vhost_virtqueue_init(struct vhost_dev *dev,
struct vhost_vring_file file = {
.index = vhost_vq_index,
};
+ struct vhost_vring_iotlb_entry request = {
+ .index = vhost_vq_index,
+ };
int r = event_notifier_init(&vq->masked_notifier, 0);
if (r < 0) {
return r;
@@ -981,7 +1070,37 @@ static int vhost_virtqueue_init(struct vhost_dev *dev,
r = -errno;
goto fail_call;
}
+
+ r = event_notifier_init(&vq->iotlb_notifier, 0);
+ if (r < 0) {
+ r = -errno;
+ goto fail_call;
+ }
+
+ file.fd = event_notifier_get_fd(&vq->iotlb_notifier);
+ r = dev->vhost_ops->vhost_set_vring_iotlb_call(dev, &file);
+ if (r) {
+ r = -errno;
+ goto fail_iotlb;
+ }
+ qemu_set_fd_handler(event_notifier_get_fd(&vq->iotlb_notifier),
+ vhost_device_iotlb_request, NULL, vq);
+
+ vq->iotlb_req = g_malloc0(sizeof(*vq->iotlb_req));
+ request.userspace_addr = (uint64_t)(unsigned long)vq->iotlb_req;
+ r = dev->vhost_ops->vhost_set_vring_iotlb_request(dev, &request);
+ if (r) {
+ r = -errno;
+ goto fail_req;
+ }
+
+ vq->dev = dev;
+
return 0;
+fail_req:
+ qemu_set_fd_handler(file.fd, NULL, NULL, NULL);
+fail_iotlb:
+ event_notifier_cleanup(&vq->iotlb_notifier);
fail_call:
event_notifier_cleanup(&vq->masked_notifier);
return r;
@@ -989,7 +1108,24 @@ fail_call:
static void vhost_virtqueue_cleanup(struct vhost_virtqueue *vq)
{
+ qemu_set_fd_handler(event_notifier_get_fd(&vq->iotlb_notifier),
+ NULL, NULL, NULL);
event_notifier_cleanup(&vq->masked_notifier);
+ event_notifier_cleanup(&vq->iotlb_notifier);
+ g_free(vq->iotlb_req);
+}
+
+static void vhost_iommu_unmap_notify(Notifier *n, void *data)
+{
+ struct vhost_dev *hdev = container_of(n, struct vhost_dev, n);
+ IOMMUTLBEntry *iotlb = data;
+ struct vhost_iotlb_entry inv = {
+ .flags.type = VHOST_IOTLB_INVALIDATE,
+ .iova = iotlb->iova,
+ .size = iotlb->addr_mask + 1,
+ };
+
+ vhost_dev_update_iotlb(hdev, &inv);
}
int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
@@ -998,6 +1134,7 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
uint64_t features;
int i, r;
+ hdev->vdev = NULL;
hdev->migration_blocker = NULL;
if (vhost_set_backend_type(hdev, backend_type) < 0) {
@@ -1052,6 +1189,8 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
.priority = 10
};
+ hdev->n.notify = vhost_iommu_unmap_notify;
+
if (hdev->migration_blocker == NULL) {
if (!(hdev->features & (0x1ULL << VHOST_F_LOG_ALL))) {
error_setg(&hdev->migration_blocker,
@@ -1231,6 +1370,10 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)
if (r < 0) {
goto fail_features;
}
+
+ memory_region_register_iommu_notifier(virtio_get_dma_as(vdev)->root,
+ &hdev->n);
+
r = hdev->vhost_ops->vhost_set_mem_table(hdev, hdev->mem);
if (r < 0) {
r = -errno;
@@ -1262,7 +1405,18 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)
}
}
+ /* FIXME: conditionally */
+ r = vhost_run_iotlb(hdev, NULL);
+ if (r < 0) {
+ goto fail_iotlb;
+ }
+
+ hdev->vdev = vdev;
return 0;
+fail_iotlb:
+ if (hdev->vhost_ops->vhost_set_vring_enable) {
+ hdev->vhost_ops->vhost_set_vring_enable(hdev, 0);
+ }
fail_log:
vhost_log_put(hdev, false);
fail_vq:
@@ -1273,6 +1427,7 @@ fail_vq:
hdev->vq_index + i);
}
i = hdev->nvqs;
+
fail_mem:
fail_features:
@@ -1292,9 +1447,11 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev)
hdev->vq_index + i);
}
+ memory_region_unregister_iommu_notifier(&hdev->n);
vhost_log_put(hdev, true);
hdev->started = false;
hdev->log = NULL;
hdev->log_size = 0;
+ hdev->vdev = NULL;
}
@@ -22,9 +22,11 @@ typedef enum VhostBackendType {
struct vhost_dev;
struct vhost_log;
struct vhost_memory;
+struct vhost_iotlb_entry;
struct vhost_vring_file;
struct vhost_vring_state;
struct vhost_vring_addr;
+struct vhost_vring_iotlb_entry;
struct vhost_scsi_target;
typedef int (*vhost_backend_init)(struct vhost_dev *dev, void *opaque);
@@ -72,6 +74,14 @@ typedef int (*vhost_migration_done_op)(struct vhost_dev *dev,
typedef bool (*vhost_backend_can_merge_op)(struct vhost_dev *dev,
uint64_t start1, uint64_t size1,
uint64_t start2, uint64_t size2);
+typedef int (*vhost_set_vring_iotlb_request_op)(struct vhost_dev *dev,
+ struct vhost_vring_iotlb_entry *entry);
+typedef int (*vhost_update_iotlb_op)(struct vhost_dev *dev,
+ struct vhost_iotlb_entry *entry);
+typedef int (*vhost_set_vring_iotlb_call_op)(struct vhost_dev *dev,
+ struct vhost_vring_file *file);
+typedef int (*vhost_run_iotlb_op)(struct vhost_dev *dev,
+ int *enalbed);
typedef struct VhostOps {
VhostBackendType backend_type;
@@ -100,6 +110,10 @@ typedef struct VhostOps {
vhost_requires_shm_log_op vhost_requires_shm_log;
vhost_migration_done_op vhost_migration_done;
vhost_backend_can_merge_op vhost_backend_can_merge;
+ vhost_set_vring_iotlb_request_op vhost_set_vring_iotlb_request;
+ vhost_update_iotlb_op vhost_update_iotlb;
+ vhost_set_vring_iotlb_call_op vhost_set_vring_iotlb_call;
+ vhost_run_iotlb_op vhost_run_iotlb;
} VhostOps;
extern const VhostOps user_ops;
@@ -20,6 +20,9 @@ struct vhost_virtqueue {
unsigned long long ring_phys;
unsigned ring_size;
EventNotifier masked_notifier;
+ EventNotifier iotlb_notifier;
+ struct vhost_iotlb_entry *iotlb_req;
+ struct vhost_dev *dev;
};
typedef unsigned long vhost_log_chunk_t;
@@ -36,7 +39,9 @@ struct vhost_log {
};
struct vhost_memory;
+struct vhost_iotlb_entry;
struct vhost_dev {
+ VirtIODevice *vdev;
MemoryListener memory_listener;
struct vhost_memory *mem;
int n_mem_sections;
@@ -61,6 +66,7 @@ struct vhost_dev {
void *opaque;
struct vhost_log *log;
QLIST_ENTRY(vhost_dev) entry;
+ Notifier n;
};
int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
@@ -16,6 +16,7 @@
#define _QEMU_VIRTIO_ACCESS_H
#include "hw/virtio/virtio.h"
#include "hw/virtio/virtio-bus.h"
+#include "sysemu/dma.h"
#include "exec/address-spaces.h"
static inline AddressSpace *virtio_get_dma_as(VirtIODevice *vdev)
@@ -193,4 +194,25 @@ static inline void virtio_tswap64s(VirtIODevice *vdev, uint64_t *s)
{
*s = virtio_tswap64(vdev, *s);
}
+
+static inline void *virtio_memory_map(VirtIODevice *vdev, hwaddr addr,
+ hwaddr *plen, int is_write)
+{
+ AddressSpace *dma_as = virtio_get_dma_as(vdev);
+
+ return dma_memory_map(dma_as, addr, plen, is_write ?
+ DMA_DIRECTION_FROM_DEVICE : DMA_DIRECTION_TO_DEVICE);
+}
+
+static inline void virtio_memory_unmap(VirtIODevice *vdev, void *buffer,
+ hwaddr len, int is_write,
+ hwaddr access_len)
+{
+ AddressSpace *dma_as = virtio_get_dma_as(vdev);
+
+ dma_memory_unmap(dma_as, buffer, len, is_write ?
+ DMA_DIRECTION_FROM_DEVICE : DMA_DIRECTION_TO_DEVICE,
+ access_len);
+}
+
#endif /* _QEMU_VIRTIO_ACCESS_H */
@@ -27,6 +27,32 @@ struct vhost_vring_file {
};
+struct vhost_iotlb_entry {
+ __u64 iova;
+ __u64 size;
+ __u64 userspace_addr;
+ struct {
+#define VHOST_ACCESS_RO 0x1
+#define VHOST_ACCESS_WO 0x2
+#define VHOST_ACCESS_RW 0x3
+ __u8 perm;
+#define VHOST_IOTLB_MISS 1
+#define VHOST_IOTLB_UPDATE 2
+#define VHOST_IOTLB_INVALIDATE 3
+ __u8 type;
+#define VHOST_IOTLB_INVALID 0x1
+#define VHOST_IOTLB_VALID 0x2
+ __u8 valid;
+ __u8 u8_padding;
+ __u32 padding;
+ } flags;
+};
+
+struct vhost_vring_iotlb_entry {
+ unsigned int index;
+ __u64 userspace_addr;
+};
+
struct vhost_vring_addr {
unsigned int index;
/* Option flags. */
@@ -127,6 +153,15 @@ struct vhost_memory {
/* Set eventfd to signal an error */
#define VHOST_SET_VRING_ERR _IOW(VHOST_VIRTIO, 0x22, struct vhost_vring_file)
+/* IOTLB */
+/* Specify an eventfd file descriptor to signle on IOTLB miss */
+#define VHOST_SET_VRING_IOTLB_CALL _IOW(VHOST_VIRTIO, 0x23, struct \
+ vhost_vring_file)
+#define VHOST_SET_VRING_IOTLB_REQUEST _IOW(VHOST_VIRTIO, 0x25, struct \
+ vhost_vring_iotlb_entry)
+#define VHOST_UPDATE_IOTLB _IOW(VHOST_VIRTIO, 0x24, struct vhost_iotlb_entry)
+#define VHOST_RUN_IOTLB _IOW(VHOST_VIRTIO, 0x26, int)
+
/* VHOST_NET specific defines */
/* Attach virtio net ring to a raw socket, or tap device.
This patches implements Device IOTLB support for vhost kernel. This is done through: 1) switch to use dma helpers when map/unmap vrings from vhost codes 2) kernel support for Device IOTLB API: - allow vhost-net to query the IOMMU IOTLB entry through eventfd - enable the ability for qemu to update a specified mapping of vhost - through ioctl. - enable the ability to invalidate a specified range of iova for the device IOTLB of vhost through ioctl. In x86/intel_iommu case this is triggered through iommu memory region notifier from device IOTLB invalidation descriptor processing routine. With all the above, kernel vhost_net can co-operate with IOMMU. Cc: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Jason Wang <jasowang@redhat.com> --- hw/virtio/vhost-backend.c | 33 +++++++ hw/virtio/vhost.c | 203 +++++++++++++++++++++++++++++++++----- include/hw/virtio/vhost-backend.h | 14 +++ include/hw/virtio/vhost.h | 6 ++ include/hw/virtio/virtio-access.h | 22 +++++ linux-headers/linux/vhost.h | 35 +++++++ 6 files changed, 290 insertions(+), 23 deletions(-)