@@ -82,6 +82,31 @@ typedef struct {
#define VFIO_USER_MAX_MAX_XFER (64 * 1024 * 1024)
+/*
+ * VFIO_USER_DMA_MAP
+ * imported from struct vfio_iommu_type1_dma_map
+ */
+typedef struct {
+ VFIOUserHdr hdr;
+ uint32_t argsz;
+ uint32_t flags;
+ uint64_t offset; /* FD offset */
+ uint64_t iova;
+ uint64_t size;
+} VFIOUserDMAMap;
+
+/*
+ * VFIO_USER_DMA_UNMAP
+ * imported from struct vfio_iommu_type1_dma_unmap
+ */
+typedef struct {
+ VFIOUserHdr hdr;
+ uint32_t argsz;
+ uint32_t flags;
+ uint64_t iova;
+ uint64_t size;
+} VFIOUserDMAUnmap;
+
/*
* VFIO_USER_DEVICE_GET_INFO
* imported from struct_device_info
@@ -146,4 +171,11 @@ typedef struct {
char data[];
} VFIOUserRegionRW;
+/*imported from struct vfio_bitmap */
+typedef struct {
+ uint64_t pgsize;
+ uint64_t size;
+ char data[];
+} VFIOUserBitmap;
+
#endif /* VFIO_USER_PROTOCOL_H */
@@ -71,6 +71,11 @@ void vfio_user_set_reqhandler(VFIODevice *vbasdev,
void *reqarg);
void vfio_user_send_reply(VFIOProxy *proxy, char *buf, int ret);
int vfio_user_validate_version(VFIODevice *vbasedev, Error **errp);
+int vfio_user_dma_map(VFIOProxy *proxy, struct vfio_iommu_type1_dma_map *map,
+ VFIOUserFDs *fds, bool will_commit);
+int vfio_user_dma_unmap(VFIOProxy *proxy,
+ struct vfio_iommu_type1_dma_unmap *unmap,
+ struct vfio_bitmap *bitmap, bool will_commit);
int vfio_user_get_info(VFIODevice *vbasedev);
int vfio_user_get_region_info(VFIODevice *vbasedev, int index,
struct vfio_region_info *info, VFIOUserFDs *fds);
@@ -80,5 +85,6 @@ int vfio_user_region_read(VFIODevice *vbasedev, uint32_t index, uint64_t offset,
uint32_t count, void *data);
int vfio_user_region_write(VFIODevice *vbasedev, uint32_t index,
uint64_t offset, uint32_t count, void *data);
+void vfio_user_drain_reqs(VFIOProxy *proxy);
#endif /* VFIO_USER_H */
@@ -87,6 +87,7 @@ typedef struct VFIOContainer {
Error *error;
bool initialized;
bool dirty_pages_supported;
+ bool will_commit;
uint64_t dirty_pgsizes;
uint64_t max_dirty_bitmap_size;
unsigned long pgsizes;
@@ -427,6 +427,7 @@ static int vfio_dma_unmap_bitmap(VFIOContainer *container,
struct vfio_iommu_type1_dma_unmap *unmap;
struct vfio_bitmap *bitmap;
uint64_t pages = REAL_HOST_PAGE_ALIGN(size) / qemu_real_host_page_size;
+ bool will_commit = container->will_commit;
int ret;
unmap = g_malloc0(sizeof(*unmap) + sizeof(*bitmap));
@@ -460,7 +461,11 @@ static int vfio_dma_unmap_bitmap(VFIOContainer *container,
goto unmap_exit;
}
- ret = ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, unmap);
+ if (container->proxy != NULL) {
+ ret = vfio_user_dma_unmap(container->proxy, unmap, bitmap, will_commit);
+ } else {
+ ret = ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, unmap);
+ }
if (!ret) {
cpu_physical_memory_set_dirty_lebitmap((unsigned long *)bitmap->data,
iotlb->translated_addr, pages);
@@ -487,12 +492,17 @@ static int vfio_dma_unmap(VFIOContainer *container,
.iova = iova,
.size = size,
};
+ bool will_commit = container->will_commit;
if (iotlb && container->dirty_pages_supported &&
vfio_devices_all_running_and_saving(container)) {
return vfio_dma_unmap_bitmap(container, iova, size, iotlb);
}
+ if (container->proxy != NULL) {
+ return vfio_user_dma_unmap(container->proxy, &unmap, NULL, will_commit);
+ }
+
while (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) {
/*
* The type1 backend has an off-by-one bug in the kernel (71a7d3d78e3c
@@ -519,7 +529,7 @@ static int vfio_dma_unmap(VFIOContainer *container,
return 0;
}
-static int vfio_dma_map(VFIOContainer *container, hwaddr iova,
+static int vfio_dma_map(VFIOContainer *container, MemoryRegion *mr, hwaddr iova,
ram_addr_t size, void *vaddr, bool readonly)
{
struct vfio_iommu_type1_dma_map map = {
@@ -529,11 +539,30 @@ static int vfio_dma_map(VFIOContainer *container, hwaddr iova,
.iova = iova,
.size = size,
};
+ bool will_commit = container->will_commit;
if (!readonly) {
map.flags |= VFIO_DMA_MAP_FLAG_WRITE;
}
+ if (container->proxy != NULL) {
+ VFIOUserFDs fds;
+ int fd;
+
+ fd = memory_region_get_fd(mr);
+ if (fd != -1 && !(container->proxy->flags & VFIO_PROXY_SECURE)) {
+ fds.send_fds = 1;
+ fds.recv_fds = 0;
+ fds.fds = &fd;
+ map.vaddr = qemu_ram_block_host_offset(mr->ram_block, vaddr);
+
+ return vfio_user_dma_map(container->proxy, &map, &fds, will_commit);
+ } else {
+ map.vaddr = 0;
+ return vfio_user_dma_map(container->proxy, &map, NULL, will_commit);
+ }
+ }
+
/*
* Try the mapping, if it fails with EBUSY, unmap the region and try
* again. This shouldn't be necessary, but we sometimes see it in
@@ -602,7 +631,8 @@ static bool vfio_listener_skipped_section(MemoryRegionSection *section)
/* Called with rcu_read_lock held. */
static bool vfio_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
- ram_addr_t *ram_addr, bool *read_only)
+ ram_addr_t *ram_addr, bool *read_only,
+ MemoryRegion **mrp)
{
MemoryRegion *mr;
hwaddr xlat;
@@ -683,6 +713,10 @@ static bool vfio_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
*read_only = !writable || mr->readonly;
}
+ if (mrp != NULL) {
+ *mrp = mr;
+ }
+
return true;
}
@@ -690,6 +724,7 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
{
VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n);
VFIOContainer *container = giommu->container;
+ MemoryRegion *mr;
hwaddr iova = iotlb->iova + giommu->iommu_offset;
void *vaddr;
int ret;
@@ -708,7 +743,7 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) {
bool read_only;
- if (!vfio_get_xlat_addr(iotlb, &vaddr, NULL, &read_only)) {
+ if (!vfio_get_xlat_addr(iotlb, &vaddr, NULL, &read_only, &mr)) {
goto out;
}
/*
@@ -718,7 +753,7 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
* of vaddr will always be there, even if the memory object is
* destroyed and its backing memory munmap-ed.
*/
- ret = vfio_dma_map(container, iova,
+ ret = vfio_dma_map(container, mr, iova,
iotlb->addr_mask + 1, vaddr,
read_only);
if (ret) {
@@ -780,7 +815,7 @@ static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl,
section->offset_within_address_space;
vaddr = memory_region_get_ram_ptr(section->mr) + start;
- ret = vfio_dma_map(vrdl->container, iova, next - start,
+ ret = vfio_dma_map(vrdl->container, section->mr, iova, next - start,
vaddr, section->readonly);
if (ret) {
/* Rollback */
@@ -888,6 +923,24 @@ static void vfio_unregister_ram_discard_listener(VFIOContainer *container,
g_free(vrdl);
}
+static void vfio_listener_begin(MemoryListener *listener)
+{
+ VFIOContainer *container = container_of(listener, VFIOContainer, listener);
+
+ container->will_commit = 1;
+}
+
+static void vfio_listener_commit(MemoryListener *listener)
+{
+ VFIOContainer *container = container_of(listener, VFIOContainer, listener);
+
+ /* wait for any async requests sent during the transaction */
+ if (container->proxy != NULL) {
+ vfio_user_drain_reqs(container->proxy);
+ }
+ container->will_commit = 0;
+}
+
static void vfio_listener_region_add(MemoryListener *listener,
MemoryRegionSection *section)
{
@@ -1080,7 +1133,7 @@ static void vfio_listener_region_add(MemoryListener *listener,
}
}
- ret = vfio_dma_map(container, iova, int128_get64(llsize),
+ ret = vfio_dma_map(container, section->mr, iova, int128_get64(llsize),
vaddr, section->readonly);
if (ret) {
error_setg(&err, "vfio_dma_map(%p, 0x%"HWADDR_PRIx", "
@@ -1346,7 +1399,7 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
}
rcu_read_lock();
- if (vfio_get_xlat_addr(iotlb, NULL, &translated_addr, NULL)) {
+ if (vfio_get_xlat_addr(iotlb, NULL, &translated_addr, NULL, NULL)) {
int ret;
ret = vfio_get_dirty_bitmap(container, iova, iotlb->addr_mask + 1,
@@ -1463,6 +1516,8 @@ static void vfio_listener_log_sync(MemoryListener *listener,
}
static const MemoryListener vfio_memory_listener = {
+ .begin = vfio_listener_begin,
+ .commit = vfio_listener_commit,
.region_add = vfio_listener_region_add,
.region_del = vfio_listener_region_del,
.log_global_start = vfio_listener_log_global_start,
@@ -408,6 +408,47 @@ static void vfio_user_send_recv(VFIOProxy *proxy, VFIOUserHdr *msg,
}
}
+void vfio_user_drain_reqs(VFIOProxy *proxy)
+{
+ VFIOUserReply *reply;
+ bool iolock = 0;
+
+ /*
+ * Any DMA map/unmap requests sent in the middle
+ * of a memory region transaction were sent async.
+ * Wait for them here.
+ */
+ QEMU_LOCK_GUARD(&proxy->lock);
+ if (proxy->last_nowait != NULL) {
+ iolock = qemu_mutex_iothread_locked();
+ if (iolock) {
+ qemu_mutex_unlock_iothread();
+ }
+
+ reply = proxy->last_nowait;
+ reply->nowait = 0;
+ while (reply->complete == 0) {
+ if (!qemu_cond_timedwait(&reply->cv, &proxy->lock, wait_time)) {
+ error_printf("vfio_drain_reqs - timed out\n");
+ break;
+ }
+ }
+
+ if (reply->msg->flags & VFIO_USER_ERROR) {
+ error_printf("vfio_user_rcv error reply on async request ");
+ error_printf("command %x error %s\n", reply->msg->command,
+ strerror(reply->msg->error_reply));
+ }
+ proxy->last_nowait = NULL;
+ g_free(reply->msg);
+ QTAILQ_INSERT_HEAD(&proxy->free, reply, next);
+ }
+
+ if (iolock) {
+ qemu_mutex_lock_iothread();
+ }
+}
+
static void vfio_user_request_msg(VFIOUserHdr *hdr, uint16_t cmd,
uint32_t size, uint32_t flags)
{
@@ -715,6 +756,89 @@ int vfio_user_validate_version(VFIODevice *vbasedev, Error **errp)
return 0;
}
+int vfio_user_dma_map(VFIOProxy *proxy, struct vfio_iommu_type1_dma_map *map,
+ VFIOUserFDs *fds, bool will_commit)
+{
+ VFIOUserDMAMap *msgp = g_malloc(sizeof(*msgp));
+ int ret, flags;
+
+ /* commit will wait, so send async without dropping BQL */
+ flags = will_commit ? (NOIOLOCK | NOWAIT) : 0;
+
+ vfio_user_request_msg(&msgp->hdr, VFIO_USER_DMA_MAP, sizeof(*msgp), 0);
+ msgp->argsz = map->argsz;
+ msgp->flags = map->flags;
+ msgp->offset = map->vaddr;
+ msgp->iova = map->iova;
+ msgp->size = map->size;
+
+ vfio_user_send_recv(proxy, &msgp->hdr, fds, 0, flags);
+ ret = (msgp->hdr.flags & VFIO_USER_ERROR) ? -msgp->hdr.error_reply : 0;
+
+ if (!(flags & NOWAIT)) {
+ g_free(msgp);
+ }
+ return ret;
+}
+
+int vfio_user_dma_unmap(VFIOProxy *proxy,
+ struct vfio_iommu_type1_dma_unmap *unmap,
+ struct vfio_bitmap *bitmap, bool will_commit)
+{
+ struct {
+ VFIOUserDMAUnmap msg;
+ VFIOUserBitmap bitmap;
+ } *msgp = NULL;
+ int msize, rsize, flags;
+
+ if (bitmap == NULL && (unmap->flags &
+ VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP)) {
+ error_printf("vfio_user_dma_unmap mismatched flags and bitmap\n");
+ return -EINVAL;
+ }
+
+ /* can't drop BQL until commit */
+ flags = will_commit ? NOIOLOCK : 0;
+
+ /*
+ * If a dirty bitmap is returned, allocate extra space for it
+ * otherwise, just send the unmap request
+ */
+ if (bitmap != NULL) {
+ msize = sizeof(*msgp);
+ rsize = msize + bitmap->size;
+ msgp = g_malloc0(rsize);
+ msgp->bitmap.pgsize = bitmap->pgsize;
+ msgp->bitmap.size = bitmap->size;
+ } else {
+ /* can only send async if no bitmap returned */
+ flags |= will_commit ? NOWAIT : 0;
+ msize = rsize = sizeof(VFIOUserDMAUnmap);
+ msgp = g_malloc0(rsize);
+ }
+
+ vfio_user_request_msg(&msgp->msg.hdr, VFIO_USER_DMA_UNMAP, msize, flags);
+ msgp->msg.argsz = unmap->argsz;
+ msgp->msg.flags = unmap->flags;
+ msgp->msg.iova = unmap->iova;
+ msgp->msg.size = unmap->size;
+
+ vfio_user_send_recv(proxy, &msgp->msg.hdr, NULL, rsize, flags);
+ if (msgp->msg.hdr.flags & VFIO_USER_ERROR) {
+ g_free(msgp);
+ return -msgp->msg.hdr.error_reply;
+ }
+
+ if (bitmap != NULL) {
+ memcpy(bitmap->data, &msgp->bitmap.data, bitmap->size);
+ }
+ if (!(flags & NOWAIT)) {
+ g_free(msgp);
+ }
+
+ return 0;
+}
+
int vfio_user_get_info(VFIODevice *vbasedev)
{
VFIOUserDeviceInfo msg;