@@ -17,6 +17,7 @@
#include "hw/virtio/vhost-iova-tree.h"
#include "hw/virtio/vhost-shadow-virtqueue.h"
#include "hw/virtio/virtio.h"
+#include "qemu/thread.h"
#include "standard-headers/linux/vhost_types.h"
/*
@@ -43,8 +44,30 @@ typedef struct vhost_vdpa_shared {
/* Copy of backend features */
uint64_t backend_cap;
+ /*
+ * Thread to map memory in QEMU incoming migration.
+ *
+ * Incoming migration calls devices ->load_setup in the main thread, but
+ * map operations can take a long time. This forbids the main thread to
+ * serve other requests like QMP.
+ *
+ * It works by fetching jobs from map_queue until it receives
+ * VhostVDPAShared, signalling the end of thread job. From that point,
+ * thread is joined and maps requests are synchronous again. These new
+ * maps are not served from main thread, so there is no danger there.
+ */
+ QemuThread map_thread;
+ GAsyncQueue *map_queue;
+ bool map_thread_enabled;
+
bool iotlb_batch_begin_sent;
+ /*
+ * The memory listener has been registered, so DMA maps have been sent to
+ * the device.
+ */
+ bool listener_registered;
+
/* Vdpa must send shadow addresses as IOTLB key for data queues, not GPA */
bool shadow_data;
} VhostVDPAShared;
@@ -73,6 +96,8 @@ int vhost_vdpa_dma_map(VhostVDPAShared *s, uint32_t asid, hwaddr iova,
hwaddr size, void *vaddr, bool readonly);
int vhost_vdpa_dma_unmap(VhostVDPAShared *s, uint32_t asid, hwaddr iova,
hwaddr size);
+int vhost_vdpa_load_setup(VhostVDPAShared *s, AddressSpace *dma_as);
+int vhost_vdpa_load_cleanup(VhostVDPAShared *s, bool vhost_will_start);
typedef struct vdpa_iommu {
VhostVDPAShared *dev_shared;
@@ -101,6 +101,15 @@ int vhost_vdpa_dma_map(VhostVDPAShared *s, uint32_t asid, hwaddr iova,
msg.iotlb.perm = readonly ? VHOST_ACCESS_RO : VHOST_ACCESS_RW;
msg.iotlb.type = VHOST_IOTLB_UPDATE;
+ if (s->map_thread_enabled && !qemu_thread_is_self(&s->map_thread)) {
+ struct vhost_msg_v2 *new_msg = g_new(struct vhost_msg_v2, 1);
+
+ *new_msg = msg;
+ g_async_queue_push(s->map_queue, new_msg);
+
+ return 0;
+ }
+
trace_vhost_vdpa_dma_map(s, fd, msg.type, msg.asid, msg.iotlb.iova,
msg.iotlb.size, msg.iotlb.uaddr, msg.iotlb.perm,
msg.iotlb.type);
@@ -131,6 +140,15 @@ int vhost_vdpa_dma_unmap(VhostVDPAShared *s, uint32_t asid, hwaddr iova,
msg.iotlb.size = size;
msg.iotlb.type = VHOST_IOTLB_INVALIDATE;
+ if (s->map_thread_enabled && !qemu_thread_is_self(&s->map_thread)) {
+ struct vhost_msg_v2 *new_msg = g_new(struct vhost_msg_v2, 1);
+
+ *new_msg = msg;
+ g_async_queue_push(s->map_queue, new_msg);
+
+ return 0;
+ }
+
trace_vhost_vdpa_dma_unmap(s, fd, msg.type, msg.asid, msg.iotlb.iova,
msg.iotlb.size, msg.iotlb.type);
@@ -156,6 +174,15 @@ static void vhost_vdpa_iotlb_batch_begin_once(VhostVDPAShared *s)
return;
}
+ if (s->map_thread_enabled && !qemu_thread_is_self(&s->map_thread)) {
+ struct vhost_msg_v2 *new_msg = g_new(struct vhost_msg_v2, 1);
+
+ *new_msg = msg;
+ g_async_queue_push(s->map_queue, new_msg);
+
+ return;
+ }
+
trace_vhost_vdpa_listener_begin_batch(s, fd, msg.type, msg.iotlb.type);
if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) {
error_report("failed to write, fd=%d, errno=%d (%s)",
@@ -180,6 +207,15 @@ static void vhost_vdpa_dma_end_batch(VhostVDPAShared *s)
msg.type = VHOST_IOTLB_MSG_V2;
msg.iotlb.type = VHOST_IOTLB_BATCH_END;
+ if (s->map_thread_enabled && !qemu_thread_is_self(&s->map_thread)) {
+ struct vhost_msg_v2 *new_msg = g_new(struct vhost_msg_v2, 1);
+
+ *new_msg = msg;
+ g_async_queue_push(s->map_queue, new_msg);
+
+ return;
+ }
+
trace_vhost_vdpa_listener_commit(s, fd, msg.type, msg.iotlb.type);
if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) {
error_report("failed to write, fd=%d, errno=%d (%s)",
@@ -1288,6 +1324,94 @@ static void vhost_vdpa_suspend(struct vhost_dev *dev)
vhost_vdpa_reset_device(dev);
}
+static void *vhost_vdpa_load_map(void *opaque)
+{
+ VhostVDPAShared *shared = opaque;
+ GPtrArray *ret = NULL;
+
+ while (true) {
+ g_autofree struct vhost_msg_v2 *msg = NULL;
+ void *job = g_async_queue_pop(shared->map_queue);
+ int r = 0;
+
+ if (job == shared) {
+ /* exit signal */
+ break;
+ }
+
+ msg = job;
+ switch (msg->iotlb.type) {
+ case VHOST_IOTLB_UPDATE:
+ r = vhost_vdpa_dma_map(shared, msg->asid, msg->iotlb.iova,
+ msg->iotlb.size,
+ (void *)(uintptr_t)msg->iotlb.uaddr,
+ msg->iotlb.perm == VHOST_ACCESS_RO);
+ break;
+ case VHOST_IOTLB_INVALIDATE:
+ r = vhost_vdpa_dma_unmap(shared, msg->asid, msg->iotlb.iova,
+ msg->iotlb.size);
+ break;
+ case VHOST_IOTLB_BATCH_BEGIN:
+ vhost_vdpa_iotlb_batch_begin_once(shared);
+ break;
+ case VHOST_IOTLB_BATCH_END:
+ vhost_vdpa_dma_end_batch(shared);
+ break;
+ default:
+ error_report("Invalid IOTLB msg type %d", msg->iotlb.type);
+ break;
+ };
+
+ if (unlikely(r != 0)) {
+ /* Add to return value so we can remove it from iova_tree */
+ if (ret == NULL) {
+ ret = g_ptr_array_new_full(0, g_free);
+ }
+
+ g_ptr_array_add(ret, g_steal_pointer(&msg));
+ }
+ }
+
+ return ret;
+}
+
+static void vhost_vdpa_spawn_maps_thread(VhostVDPAShared *shared)
+{
+ shared->map_queue = g_async_queue_new();
+ qemu_thread_create(&shared->map_thread, "vdpa map thread",
+ vhost_vdpa_load_map, shared, QEMU_THREAD_JOINABLE);
+ shared->map_thread_enabled = true;
+}
+
+static bool vhost_vdpa_join_maps_thread(VhostVDPAShared *shared)
+{
+ g_autoptr(GPtrArray) failed_iova = NULL;
+
+ /* Signal end of offloading maps */
+ g_async_queue_push(shared->map_queue, shared);
+ failed_iova = qemu_thread_join(&shared->map_thread);
+ g_async_queue_unref(shared->map_queue);
+
+ shared->map_thread_enabled = false;
+
+ if (likely(!failed_iova)) {
+ return true;
+ }
+
+ /* If it is a failed IOVA, abort starting */
+ for (size_t i = 0; failed_iova->len; ++i) {
+ struct vhost_msg_v2 *msg = g_ptr_array_index(failed_iova, i);
+ DMAMap mem_region = {
+ .iova = msg->iotlb.iova,
+ .size = msg->iotlb.size - 1, /* Inclusive */
+ };
+
+ vhost_iova_tree_remove(shared->iova_tree, mem_region);
+ }
+
+ return false;
+}
+
static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
{
struct vhost_vdpa *v = dev->opaque;
@@ -1315,7 +1439,15 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
"IOMMU and try again");
return -1;
}
- memory_listener_register(&v->shared->listener, dev->vdev->dma_as);
+ if (!v->shared->listener_registered) {
+ memory_listener_register(&v->shared->listener, dev->vdev->dma_as);
+ v->shared->listener_registered = true;
+ } else {
+ ok = vhost_vdpa_join_maps_thread(v->shared);
+ if (unlikely(!ok)) {
+ goto out_stop;
+ }
+ }
return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
}
@@ -1340,6 +1472,8 @@ static void vhost_vdpa_reset_status(struct vhost_dev *dev)
vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
VIRTIO_CONFIG_S_DRIVER);
memory_listener_unregister(&v->shared->listener);
+ v->shared->listener_registered = false;
+
}
static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base,
@@ -1522,3 +1656,34 @@ const VhostOps vdpa_ops = {
.vhost_set_config_call = vhost_vdpa_set_config_call,
.vhost_reset_status = vhost_vdpa_reset_status,
};
+
+int vhost_vdpa_load_setup(VhostVDPAShared *shared, AddressSpace *dma_as)
+{
+ uint8_t s = VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER;
+ int r = ioctl(shared->device_fd, VHOST_VDPA_SET_STATUS, &s);
+ if (unlikely(r < 0)) {
+ return r;
+ }
+
+ vhost_vdpa_spawn_maps_thread(shared);
+ memory_listener_register(&shared->listener, dma_as);
+ shared->listener_registered = true;
+ return 0;
+}
+
+int vhost_vdpa_load_cleanup(VhostVDPAShared *shared, bool vhost_will_start)
+{
+ if (!shared->map_thread_enabled) {
+ return 0;
+ }
+
+ if (vhost_will_start) {
+ /*
+ * Delegate the join of map thread to vhost_vdpa_dev_start, as it runs
+ * out of main qemu lock.
+ */
+ return 0;
+ }
+
+ return vhost_vdpa_join_maps_thread(shared) ? 0 : -1;
+}