@@ -94,6 +94,31 @@ typedef struct {
/*
+ * VFIO_USER_DMA_MAP
+ * imported from struct vfio_iommu_type1_dma_map
+ */
+typedef struct {
+ VFIOUserHdr hdr;
+ uint32_t argsz;
+ uint32_t flags;
+ uint64_t offset; /* FD offset */
+ uint64_t iova;
+ uint64_t size;
+} VFIOUserDMAMap;
+
+/*
+ * VFIO_USER_DMA_UNMAP
+ * imported from struct vfio_iommu_type1_dma_unmap
+ */
+typedef struct {
+ VFIOUserHdr hdr;
+ uint32_t argsz;
+ uint32_t flags;
+ uint64_t iova;
+ uint64_t size;
+} VFIOUserDMAUnmap;
+
+/*
* VFIO_USER_DEVICE_GET_INFO
* imported from struct_device_info
*/
@@ -157,4 +182,11 @@ typedef struct {
char data[];
} VFIOUserRegionRW;
+/*imported from struct vfio_bitmap */
+typedef struct {
+ uint64_t pgsize;
+ uint64_t size;
+ char data[];
+} VFIOUserBitmap;
+
#endif /* VFIO_USER_PROTOCOL_H */
@@ -90,6 +90,7 @@ typedef struct VFIOContainer {
VFIOContIO *io_ops;
bool initialized;
bool dirty_pages_supported;
+ bool async_ops;
uint64_t dirty_pgsizes;
uint64_t max_dirty_bitmap_size;
unsigned long pgsizes;
@@ -199,7 +200,7 @@ struct VFIODevIO {
((vdev)->io_ops->region_write((vdev), (nr), (off), (size), (data), (post)))
struct VFIOContIO {
- int (*dma_map)(VFIOContainer *container,
+ int (*dma_map)(VFIOContainer *container, MemoryRegion *mr,
struct vfio_iommu_type1_dma_map *map);
int (*dma_unmap)(VFIOContainer *container,
struct vfio_iommu_type1_dma_unmap *unmap,
@@ -207,14 +208,16 @@ struct VFIOContIO {
int (*dirty_bitmap)(VFIOContainer *container,
struct vfio_iommu_type1_dirty_bitmap *bitmap,
struct vfio_iommu_type1_dirty_bitmap_get *range);
+ void (*wait_commit)(VFIOContainer *container);
};
-#define CONT_DMA_MAP(cont, map) \
- ((cont)->io_ops->dma_map((cont), (map)))
+#define CONT_DMA_MAP(cont, mr, map) \
+ ((cont)->io_ops->dma_map((cont), (mr), (map)))
#define CONT_DMA_UNMAP(cont, unmap, bitmap) \
((cont)->io_ops->dma_unmap((cont), (unmap), (bitmap)))
#define CONT_DIRTY_BITMAP(cont, bitmap, range) \
((cont)->io_ops->dirty_bitmap((cont), (bitmap), (range)))
+#define CONT_WAIT_COMMIT(cont) ((cont)->io_ops->wait_commit(cont))
extern VFIODevIO vfio_dev_io_ioctl;
extern VFIOContIO vfio_cont_io_ioctl;
@@ -480,7 +480,7 @@ static int vfio_dma_unmap(VFIOContainer *container,
return CONT_DMA_UNMAP(container, &unmap, NULL);
}
-static int vfio_dma_map(VFIOContainer *container, hwaddr iova,
+static int vfio_dma_map(VFIOContainer *container, MemoryRegion *mr, hwaddr iova,
ram_addr_t size, void *vaddr, bool readonly)
{
struct vfio_iommu_type1_dma_map map = {
@@ -496,7 +496,7 @@ static int vfio_dma_map(VFIOContainer *container, hwaddr iova,
map.flags |= VFIO_DMA_MAP_FLAG_WRITE;
}
- ret = CONT_DMA_MAP(container, &map);
+ ret = CONT_DMA_MAP(container, mr, &map);
if (ret < 0) {
error_report("VFIO_MAP_DMA failed: %s", strerror(-ret));
@@ -559,7 +559,8 @@ static bool vfio_listener_skipped_section(MemoryRegionSection *section)
/* Called with rcu_read_lock held. */
static bool vfio_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
- ram_addr_t *ram_addr, bool *read_only)
+ ram_addr_t *ram_addr, bool *read_only,
+ MemoryRegion **mrp)
{
MemoryRegion *mr;
hwaddr xlat;
@@ -640,6 +641,10 @@ static bool vfio_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
*read_only = !writable || mr->readonly;
}
+ if (mrp != NULL) {
+ *mrp = mr;
+ }
+
return true;
}
@@ -647,6 +652,7 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
{
VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n);
VFIOContainer *container = giommu->container;
+ MemoryRegion *mr;
hwaddr iova = iotlb->iova + giommu->iommu_offset;
void *vaddr;
int ret;
@@ -665,7 +671,7 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) {
bool read_only;
- if (!vfio_get_xlat_addr(iotlb, &vaddr, NULL, &read_only)) {
+ if (!vfio_get_xlat_addr(iotlb, &vaddr, NULL, &read_only, &mr)) {
goto out;
}
/*
@@ -675,14 +681,14 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
* of vaddr will always be there, even if the memory object is
* destroyed and its backing memory munmap-ed.
*/
- ret = vfio_dma_map(container, iova,
+ ret = vfio_dma_map(container, mr, iova,
iotlb->addr_mask + 1, vaddr,
read_only);
if (ret) {
error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", "
- "0x%"HWADDR_PRIx", %p) = %d (%m)",
+ "0x%"HWADDR_PRIx", %p)",
container, iova,
- iotlb->addr_mask + 1, vaddr, ret);
+ iotlb->addr_mask + 1, vaddr);
}
} else {
ret = vfio_dma_unmap(container, iova, iotlb->addr_mask + 1, iotlb);
@@ -737,7 +743,7 @@ static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl,
section->offset_within_address_space;
vaddr = memory_region_get_ram_ptr(section->mr) + start;
- ret = vfio_dma_map(vrdl->container, iova, next - start,
+ ret = vfio_dma_map(vrdl->container, section->mr, iova, next - start,
vaddr, section->readonly);
if (ret) {
/* Rollback */
@@ -845,6 +851,29 @@ static void vfio_unregister_ram_discard_listener(VFIOContainer *container,
g_free(vrdl);
}
+static void vfio_listener_begin(MemoryListener *listener)
+{
+ VFIOContainer *container = container_of(listener, VFIOContainer, listener);
+
+ /*
+ * When DMA space is the physical address space,
+ * the region add/del listeners will fire during
+ * memory update transactions. These depend on BQL
+ * being held, so do any resulting map/demap ops async
+ * while keeping BQL.
+ */
+ container->async_ops = true;
+}
+
+static void vfio_listener_commit(MemoryListener *listener)
+{
+ VFIOContainer *container = container_of(listener, VFIOContainer, listener);
+
+ /* wait here for any async requests sent during the transaction */
+ CONT_WAIT_COMMIT(container);
+ container->async_ops = false;
+}
+
static void vfio_listener_region_add(MemoryListener *listener,
MemoryRegionSection *section)
{
@@ -1044,12 +1073,12 @@ static void vfio_listener_region_add(MemoryListener *listener,
}
}
- ret = vfio_dma_map(container, iova, int128_get64(llsize),
+ ret = vfio_dma_map(container, section->mr, iova, int128_get64(llsize),
vaddr, section->readonly);
if (ret) {
error_setg(&err, "vfio_dma_map(%p, 0x%"HWADDR_PRIx", "
- "0x%"HWADDR_PRIx", %p) = %d (%m)",
- container, iova, int128_get64(llsize), vaddr, ret);
+ "0x%"HWADDR_PRIx", %p)",
+ container, iova, int128_get64(llsize), vaddr);
if (memory_region_is_ram_device(section->mr)) {
/* Allow unexpected mappings not to be fatal for RAM devices */
error_report_err(err);
@@ -1310,7 +1339,7 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
}
rcu_read_lock();
- if (vfio_get_xlat_addr(iotlb, NULL, &translated_addr, NULL)) {
+ if (vfio_get_xlat_addr(iotlb, NULL, &translated_addr, NULL, NULL)) {
int ret;
ret = vfio_get_dirty_bitmap(container, iova, iotlb->addr_mask + 1,
@@ -1428,6 +1457,8 @@ static void vfio_listener_log_sync(MemoryListener *listener,
static const MemoryListener vfio_memory_listener = {
.name = "vfio",
+ .begin = vfio_listener_begin,
+ .commit = vfio_listener_commit,
.region_add = vfio_listener_region_add,
.region_del = vfio_listener_region_del,
.log_global_start = vfio_listener_log_global_start,
@@ -2819,7 +2850,7 @@ VFIODevIO vfio_dev_io_ioctl = {
.region_write = vfio_io_region_write,
};
-static int vfio_io_dma_map(VFIOContainer *container,
+static int vfio_io_dma_map(VFIOContainer *container, MemoryRegion *mr,
struct vfio_iommu_type1_dma_map *map)
{
@@ -2879,8 +2910,14 @@ static int vfio_io_dirty_bitmap(VFIOContainer *container,
return ret < 0 ? -errno : ret;
}
+static void vfio_io_wait_commit(VFIOContainer *container)
+{
+ /* ioctl()s are synchronous */
+}
+
VFIOContIO vfio_cont_io_ioctl = {
.dma_map = vfio_io_dma_map,
.dma_unmap = vfio_io_dma_unmap,
.dirty_bitmap = vfio_io_dirty_bitmap,
+ .wait_commit = vfio_io_wait_commit,
};
@@ -52,8 +52,11 @@ static void vfio_user_request(void *opaque);
static int vfio_user_send_queued(VFIOProxy *proxy, VFIOUserMsg *msg);
static void vfio_user_send_async(VFIOProxy *proxy, VFIOUserHdr *hdr,
VFIOUserFDs *fds);
+static void vfio_user_send_nowait(VFIOProxy *proxy, VFIOUserHdr *hdr,
+ VFIOUserFDs *fds, int rsize);
static void vfio_user_send_wait(VFIOProxy *proxy, VFIOUserHdr *hdr,
VFIOUserFDs *fds, int rsize, bool nobql);
+static void vfio_user_wait_reqs(VFIOProxy *proxy);
static void vfio_user_request_msg(VFIOUserHdr *hdr, uint16_t cmd,
uint32_t size, uint32_t flags);
@@ -563,6 +566,36 @@ static void vfio_user_send_async(VFIOProxy *proxy, VFIOUserHdr *hdr,
}
}
+/*
+ * nowait send - vfio_wait_reqs() can wait for it later
+ */
+static void vfio_user_send_nowait(VFIOProxy *proxy, VFIOUserHdr *hdr,
+ VFIOUserFDs *fds, int rsize)
+{
+ VFIOUserMsg *msg;
+ int ret;
+
+ if (hdr->flags & VFIO_USER_NO_REPLY) {
+ error_printf("vfio_user_send_nowait on async message\n");
+ return;
+ }
+
+ QEMU_LOCK_GUARD(&proxy->lock);
+
+ msg = vfio_user_getmsg(proxy, hdr, fds);
+ msg->id = hdr->id;
+ msg->rsize = rsize ? rsize : hdr->size;
+ msg->type = VFIO_MSG_NOWAIT;
+
+ ret = vfio_user_send_queued(proxy, msg);
+ if (ret < 0) {
+ vfio_user_recycle(proxy, msg);
+ return;
+ }
+
+ proxy->last_nowait = msg;
+}
+
static void vfio_user_send_wait(VFIOProxy *proxy, VFIOUserHdr *hdr,
VFIOUserFDs *fds, int rsize, bool nobql)
{
@@ -612,6 +645,57 @@ static void vfio_user_send_wait(VFIOProxy *proxy, VFIOUserHdr *hdr,
}
}
+static void vfio_user_wait_reqs(VFIOProxy *proxy)
+{
+ VFIOUserMsg *msg;
+ bool iolock = false;
+
+ /*
+ * Any DMA map/unmap requests sent in the middle
+ * of a memory region transaction were sent nowait.
+ * Wait for them here.
+ */
+ qemu_mutex_lock(&proxy->lock);
+ if (proxy->last_nowait != NULL) {
+ iolock = qemu_mutex_iothread_locked();
+ if (iolock) {
+ qemu_mutex_unlock_iothread();
+ }
+
+ /*
+ * Change type to WAIT to wait for reply
+ */
+ msg = proxy->last_nowait;
+ msg->type = VFIO_MSG_WAIT;
+ while (!msg->complete) {
+ if (!qemu_cond_timedwait(&msg->cv, &proxy->lock, wait_time)) {
+ QTAILQ_REMOVE(&proxy->pending, msg, next);
+ error_printf("vfio_wait_reqs - timed out\n");
+ break;
+ }
+ }
+
+ if (msg->hdr->flags & VFIO_USER_ERROR) {
+ error_printf("vfio_user_wait_reqs - error reply on async request ");
+ error_printf("command %x error %s\n", msg->hdr->command,
+ strerror(msg->hdr->error_reply));
+ }
+
+ proxy->last_nowait = NULL;
+ /*
+ * Change type back to NOWAIT to free
+ */
+ msg->type = VFIO_MSG_NOWAIT;
+ vfio_user_recycle(proxy, msg);
+ }
+
+ /* lock order is BQL->proxy - don't hold proxy when getting BQL */
+ qemu_mutex_unlock(&proxy->lock);
+ if (iolock) {
+ qemu_mutex_lock_iothread();
+ }
+}
+
static QLIST_HEAD(, VFIOProxy) vfio_user_sockets =
QLIST_HEAD_INITIALIZER(vfio_user_sockets);
@@ -937,6 +1021,103 @@ int vfio_user_validate_version(VFIODevice *vbasedev, Error **errp)
return 0;
}
+static int vfio_user_dma_map(VFIOProxy *proxy,
+ struct vfio_iommu_type1_dma_map *map,
+ int fd, bool will_commit)
+{
+ VFIOUserFDs *fds = NULL;
+ VFIOUserDMAMap *msgp = g_malloc0(sizeof(*msgp));
+ int ret;
+
+ vfio_user_request_msg(&msgp->hdr, VFIO_USER_DMA_MAP, sizeof(*msgp), 0);
+ msgp->argsz = map->argsz;
+ msgp->flags = map->flags;
+ msgp->offset = map->vaddr;
+ msgp->iova = map->iova;
+ msgp->size = map->size;
+
+ /*
+ * The will_commit case sends without blocking or dropping BQL.
+ * They're later waited for in vfio_send_wait_reqs.
+ */
+ if (will_commit) {
+ /* can't use auto variable since we don't block */
+ if (fd != -1) {
+ fds = vfio_user_getfds(1);
+ fds->send_fds = 1;
+ fds->fds[0] = fd;
+ }
+ vfio_user_send_nowait(proxy, &msgp->hdr, fds, 0);
+ ret = 0;
+ } else {
+ VFIOUserFDs local_fds = { 1, 0, &fd };
+
+ fds = fd != -1 ? &local_fds : NULL;
+ vfio_user_send_wait(proxy, &msgp->hdr, fds, 0, will_commit);
+ ret = (msgp->hdr.flags & VFIO_USER_ERROR) ? -msgp->hdr.error_reply : 0;
+ g_free(msgp);
+ }
+
+ return ret;
+}
+
+static int vfio_user_dma_unmap(VFIOProxy *proxy,
+ struct vfio_iommu_type1_dma_unmap *unmap,
+ struct vfio_bitmap *bitmap, bool will_commit)
+{
+ struct {
+ VFIOUserDMAUnmap msg;
+ VFIOUserBitmap bitmap;
+ } *msgp = NULL;
+ int msize, rsize;
+ bool blocking = !will_commit;
+
+ if (bitmap == NULL &&
+ (unmap->flags & VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP)) {
+ error_printf("vfio_user_dma_unmap mismatched flags and bitmap\n");
+ return -EINVAL;
+ }
+
+ /*
+ * If a dirty bitmap is returned, allocate extra space for it
+ * and block for reply even in the will_commit case.
+ * Otherwise, can send the unmap request without waiting.
+ */
+ if (bitmap != NULL) {
+ blocking = true;
+ msize = sizeof(*msgp);
+ rsize = msize + bitmap->size;
+ msgp = g_malloc0(rsize);
+ msgp->bitmap.pgsize = bitmap->pgsize;
+ msgp->bitmap.size = bitmap->size;
+ } else {
+ msize = rsize = sizeof(VFIOUserDMAUnmap);
+ msgp = g_malloc0(rsize);
+ }
+
+ vfio_user_request_msg(&msgp->msg.hdr, VFIO_USER_DMA_UNMAP, msize, 0);
+ msgp->msg.argsz = rsize - sizeof(VFIOUserHdr);
+ msgp->msg.argsz = unmap->argsz;
+ msgp->msg.flags = unmap->flags;
+ msgp->msg.iova = unmap->iova;
+ msgp->msg.size = unmap->size;
+
+ if (blocking) {
+ vfio_user_send_wait(proxy, &msgp->msg.hdr, NULL, rsize, will_commit);
+ if (msgp->msg.hdr.flags & VFIO_USER_ERROR) {
+ return -msgp->msg.hdr.error_reply;
+ }
+ if (bitmap != NULL) {
+ memcpy(bitmap->data, &msgp->bitmap.data, bitmap->size);
+ }
+ g_free(msgp);
+ } else {
+ vfio_user_send_nowait(proxy, &msgp->msg.hdr, NULL, rsize);
+ }
+
+ return 0;
+}
+
static int vfio_user_get_info(VFIOProxy *proxy, struct vfio_device_info *info)
{
VFIOUserDeviceInfo msg;
@@ -1251,5 +1432,41 @@ VFIODevIO vfio_dev_io_sock = {
};
+static int vfio_user_io_dma_map(VFIOContainer *container, MemoryRegion *mr,
+ struct vfio_iommu_type1_dma_map *map)
+{
+ int fd = memory_region_get_fd(mr);
+
+ /*
+ * map->vaddr enters as a QEMU process address
+ * make it either a file offset for mapped areas or 0
+ */
+ if (fd != -1) {
+ void *addr = (void *)(uintptr_t)map->vaddr;
+
+ map->vaddr = qemu_ram_block_host_offset(mr->ram_block, addr);
+ } else {
+ map->vaddr = 0;
+ }
+
+ return vfio_user_dma_map(container->proxy, map, fd, container->async_ops);
+}
+
+static int vfio_user_io_dma_unmap(VFIOContainer *container,
+ struct vfio_iommu_type1_dma_unmap *unmap,
+ struct vfio_bitmap *bitmap)
+{
+ return vfio_user_dma_unmap(container->proxy, unmap, bitmap,
+ container->async_ops);
+}
+
+static void vfio_user_io_wait_commit(VFIOContainer *container)
+{
+ vfio_user_wait_reqs(container->proxy);
+}
+
VFIOContIO vfio_cont_io_sock = {
+ .dma_map = vfio_user_io_dma_map,
+ .dma_unmap = vfio_user_io_dma_unmap,
+ .wait_commit = vfio_user_io_wait_commit,
};