diff mbox series

[v2,11/23] vfio-user: get region info

Message ID f8cb1656f09ac50ab5ba68ca40be700fc2d66e59.1675228037.git.john.g.johnson@oracle.com (mailing list archive)
State New, archived
Headers show
Series vfio-user client | expand

Commit Message

John Johnson Feb. 2, 2023, 5:55 a.m. UTC
Add per-region FD to support mmap() of remote device regions

Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
---
 hw/vfio/user-protocol.h       | 14 +++++++++
 hw/vfio/user.h                |  2 ++
 include/hw/vfio/vfio-common.h |  5 +++-
 hw/vfio/ap.c                  |  1 +
 hw/vfio/ccw.c                 |  1 +
 hw/vfio/common.c              | 31 ++++++++++++++++++--
 hw/vfio/pci.c                 |  1 +
 hw/vfio/platform.c            |  1 +
 hw/vfio/user-pci.c            |  2 ++
 hw/vfio/user.c                | 68 +++++++++++++++++++++++++++++++++++++++++++
 hw/vfio/trace-events          |  1 +
 11 files changed, 123 insertions(+), 4 deletions(-)

Comments

Alex Williamson Feb. 3, 2023, 11:11 p.m. UTC | #1
On Wed,  1 Feb 2023 21:55:47 -0800
John Johnson <john.g.johnson@oracle.com> wrote:
> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index 792e247..d26b325 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
> @@ -1584,6 +1584,11 @@ int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region,
>      region->size = info->size;
>      region->fd_offset = info->offset;
>      region->nr = index;
> +    if (vbasedev->regfds != NULL) {
> +        region->fd = vbasedev->regfds[index];
> +    } else {
> +        region->fd = vbasedev->fd;
> +    }
>  
>      if (region->size) {
>          region->mem = g_new0(MemoryRegion, 1);
> @@ -1635,7 +1640,7 @@ int vfio_region_mmap(VFIORegion *region)
>  
>      for (i = 0; i < region->nr_mmaps; i++) {
>          region->mmaps[i].mmap = mmap(NULL, region->mmaps[i].size, prot,
> -                                     MAP_SHARED, region->vbasedev->fd,
> +                                     MAP_SHARED, region->fd,
>                                       region->fd_offset +
>                                       region->mmaps[i].offset);
>          if (region->mmaps[i].mmap == MAP_FAILED) {
> @@ -2442,10 +2447,17 @@ void vfio_put_base_device(VFIODevice *vbasedev)
>          int i;
>  
>          for (i = 0; i < vbasedev->num_regions; i++) {
> +            if (vbasedev->regfds != NULL && vbasedev->regfds[i] != -1) {
> +                close(vbasedev->regfds[i]);
> +            }

There's an exit in vfio_get_region_info() where regfds is allocated and
the regfd[i] could still be zero initialized, ie. if .get_region_info()
fails.  vfio_get_all_regions() ignores the return value of
vfio_get_region_info().  We could close(0) here.  Thanks,

Alex
diff mbox series

Patch

diff --git a/hw/vfio/user-protocol.h b/hw/vfio/user-protocol.h
index 5f9ef17..6f70a48 100644
--- a/hw/vfio/user-protocol.h
+++ b/hw/vfio/user-protocol.h
@@ -125,4 +125,18 @@  typedef struct {
     uint32_t num_irqs;
 } VFIOUserDeviceInfo;
 
+/*
+ * VFIO_USER_DEVICE_GET_REGION_INFO
+ * imported from struct vfio_region_info
+ */
+typedef struct {
+    VFIOUserHdr hdr;
+    uint32_t argsz;
+    uint32_t flags;
+    uint32_t index;
+    uint32_t cap_offset;
+    uint64_t size;
+    uint64_t offset;
+} VFIOUserRegionInfo;
+
 #endif /* VFIO_USER_PROTOCOL_H */
diff --git a/hw/vfio/user.h b/hw/vfio/user.h
index d148661..e6485dc 100644
--- a/hw/vfio/user.h
+++ b/hw/vfio/user.h
@@ -93,4 +93,6 @@  void vfio_user_set_handler(VFIODevice *vbasedev,
 int vfio_user_get_device(VFIODevice *vbasedev, Error **errp);
 int vfio_user_validate_version(VFIOUserProxy *proxy, Error **errp);
 
+extern VFIODeviceIO vfio_dev_io_sock;
+
 #endif /* VFIO_USER_H */
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 0962e37..9fb4c80 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -56,6 +56,7 @@  typedef struct VFIORegion {
     uint32_t nr_mmaps;
     VFIOMmap *mmaps;
     uint8_t nr; /* cache the region number for debug */
+    int fd; /* fd to mmap() region */
 } VFIORegion;
 
 typedef struct VFIOMigration {
@@ -140,6 +141,7 @@  typedef struct VFIODevice {
     bool no_mmap;
     bool ram_block_discard_allowed;
     bool enable_migration;
+    bool use_regfds;
     VFIODeviceOps *ops;
     VFIODeviceIO *io;
     unsigned int num_irqs;
@@ -150,6 +152,7 @@  typedef struct VFIODevice {
     OnOffAuto pre_copy_dirty_page_tracking;
     VFIOUserProxy *proxy;
     struct vfio_region_info **regions;
+    int *regfds;
 } VFIODevice;
 
 struct VFIODeviceOps {
@@ -171,7 +174,7 @@  struct VFIODeviceOps {
  */
 struct VFIODeviceIO {
     int (*get_region_info)(VFIODevice *vdev,
-                           struct vfio_region_info *info);
+                           struct vfio_region_info *info, int *fd);
     int (*get_irq_info)(VFIODevice *vdev, struct vfio_irq_info *irq);
     int (*set_irqs)(VFIODevice *vdev, struct vfio_irq_set *irqs);
     int (*region_read)(VFIODevice *vdev, uint8_t nr, off_t off, uint32_t size,
diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c
index c6638d5..06d745f 100644
--- a/hw/vfio/ap.c
+++ b/hw/vfio/ap.c
@@ -103,6 +103,7 @@  static void vfio_ap_realize(DeviceState *dev, Error **errp)
     vapdev->vdev.name = g_strdup_printf("%s", mdevid);
     vapdev->vdev.dev = dev;
     vapdev->vdev.io = &vfio_dev_io_ioctl;
+    vapdev->vdev.use_regfds = false;
 
     /*
      * vfio-ap devices operate in a way compatible with discarding of
diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c
index e4d840d..00605bd 100644
--- a/hw/vfio/ccw.c
+++ b/hw/vfio/ccw.c
@@ -615,6 +615,7 @@  static void vfio_ccw_get_device(VFIOGroup *group, VFIOCCWDevice *vcdev,
     vcdev->vdev.name = name;
     vcdev->vdev.dev = &vcdev->cdev.parent_obj.parent_obj;
     vcdev->vdev.io = &vfio_dev_io_ioctl;
+    vcdev->vdev.use_regfds = false;
 
     return;
 
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 792e247..d26b325 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -1584,6 +1584,11 @@  int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region,
     region->size = info->size;
     region->fd_offset = info->offset;
     region->nr = index;
+    if (vbasedev->regfds != NULL) {
+        region->fd = vbasedev->regfds[index];
+    } else {
+        region->fd = vbasedev->fd;
+    }
 
     if (region->size) {
         region->mem = g_new0(MemoryRegion, 1);
@@ -1635,7 +1640,7 @@  int vfio_region_mmap(VFIORegion *region)
 
     for (i = 0; i < region->nr_mmaps; i++) {
         region->mmaps[i].mmap = mmap(NULL, region->mmaps[i].size, prot,
-                                     MAP_SHARED, region->vbasedev->fd,
+                                     MAP_SHARED, region->fd,
                                      region->fd_offset +
                                      region->mmaps[i].offset);
         if (region->mmaps[i].mmap == MAP_FAILED) {
@@ -2442,10 +2447,17 @@  void vfio_put_base_device(VFIODevice *vbasedev)
         int i;
 
         for (i = 0; i < vbasedev->num_regions; i++) {
+            if (vbasedev->regfds != NULL && vbasedev->regfds[i] != -1) {
+                close(vbasedev->regfds[i]);
+            }
             g_free(vbasedev->regions[i]);
         }
         g_free(vbasedev->regions);
         vbasedev->regions = NULL;
+        if (vbasedev->regfds != NULL) {
+            g_free(vbasedev->regfds);
+            vbasedev->regfds = NULL;
+        }
     }
 
     if (!vbasedev->group) {
@@ -2461,12 +2473,16 @@  int vfio_get_region_info(VFIODevice *vbasedev, int index,
                          struct vfio_region_info **info)
 {
     size_t argsz = sizeof(struct vfio_region_info);
+    int fd = -1;
     int ret;
 
     /* create region cache */
     if (vbasedev->regions == NULL) {
         vbasedev->regions = g_new0(struct vfio_region_info *,
                                    vbasedev->num_regions);
+        if (vbasedev->use_regfds) {
+            vbasedev->regfds = g_new0(int, vbasedev->num_regions);
+        }
     }
     /* check cache */
     if (vbasedev->regions[index] != NULL) {
@@ -2480,7 +2496,7 @@  int vfio_get_region_info(VFIODevice *vbasedev, int index,
 retry:
     (*info)->argsz = argsz;
 
-    ret = vbasedev->io->get_region_info(vbasedev, *info);
+    ret = vbasedev->io->get_region_info(vbasedev, *info, &fd);
     if (ret != 0) {
         g_free(*info);
         *info = NULL;
@@ -2490,12 +2506,19 @@  retry:
     if ((*info)->argsz > argsz) {
         argsz = (*info)->argsz;
         *info = g_realloc(*info, argsz);
+        if (fd != -1) {
+            close(fd);
+            fd = -1;
+        }
 
         goto retry;
     }
 
     /* fill cache */
     vbasedev->regions[index] = *info;
+    if (vbasedev->regfds != NULL) {
+        vbasedev->regfds[index] = fd;
+    }
 
     return 0;
 }
@@ -2646,10 +2669,12 @@  int vfio_eeh_as_op(AddressSpace *as, uint32_t op)
  */
 
 static int vfio_io_get_region_info(VFIODevice *vbasedev,
-                                   struct vfio_region_info *info)
+                                   struct vfio_region_info *info,
+                                   int *fd)
 {
     int ret;
 
+    *fd = -1;
     ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, info);
 
     return ret < 0 ? -errno : ret;
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index a8bc0ea..935d247 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2908,6 +2908,7 @@  static void vfio_realize(PCIDevice *pdev, Error **errp)
     vbasedev->type = VFIO_DEVICE_TYPE_PCI;
     vbasedev->dev = DEVICE(vdev);
     vbasedev->io = &vfio_dev_io_ioctl;
+    vbasedev->use_regfds = false;
 
     tmp = g_strdup_printf("%s/iommu_group", vbasedev->sysfsdev);
     len = readlink(tmp, group_path, sizeof(group_path));
diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c
index 222405e..8ddfcca 100644
--- a/hw/vfio/platform.c
+++ b/hw/vfio/platform.c
@@ -622,6 +622,7 @@  static void vfio_platform_realize(DeviceState *dev, Error **errp)
     vbasedev->dev = dev;
     vbasedev->ops = &vfio_platform_ops;
     vbasedev->io = &vfio_dev_io_ioctl;
+    vbasedev->use_regfds = false;
 
     qemu_mutex_init(&vdev->intp_mutex);
 
diff --git a/hw/vfio/user-pci.c b/hw/vfio/user-pci.c
index e5a9450..09c6c98 100644
--- a/hw/vfio/user-pci.c
+++ b/hw/vfio/user-pci.c
@@ -116,6 +116,8 @@  static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp)
     vbasedev->ops = &vfio_user_pci_ops;
     vbasedev->type = VFIO_DEVICE_TYPE_PCI;
     vbasedev->dev = DEVICE(vdev);
+    vbasedev->io = &vfio_dev_io_sock;
+    vbasedev->use_regfds = true;
 
     ret = vfio_user_get_device(vbasedev, errp);
     if (ret) {
diff --git a/hw/vfio/user.c b/hw/vfio/user.c
index d0ec14c..a05ba80 100644
--- a/hw/vfio/user.c
+++ b/hw/vfio/user.c
@@ -1143,3 +1143,71 @@  static int vfio_user_get_info(VFIOUserProxy *proxy,
     memcpy(info, &msg.argsz, argsz);
     return 0;
 }
+
+static int vfio_user_get_region_info(VFIOUserProxy *proxy,
+                                     struct vfio_region_info *info,
+                                     VFIOUserFDs *fds)
+{
+    g_autofree VFIOUserRegionInfo *msgp = NULL;
+    uint32_t size;
+
+    /* data returned can be larger than vfio_region_info */
+    if (info->argsz < sizeof(*info)) {
+        error_printf("vfio_user_get_region_info argsz too small\n");
+        return -E2BIG;
+    }
+    if (fds != NULL && fds->send_fds != 0) {
+        error_printf("vfio_user_get_region_info can't send FDs\n");
+        return -EINVAL;
+    }
+
+    size = info->argsz + sizeof(VFIOUserHdr);
+    msgp = g_malloc0(size);
+
+    vfio_user_request_msg(&msgp->hdr, VFIO_USER_DEVICE_GET_REGION_INFO,
+                          sizeof(*msgp), 0);
+    msgp->argsz = info->argsz;
+    msgp->index = info->index;
+
+    vfio_user_send_wait(proxy, &msgp->hdr, fds, size, false);
+    if (msgp->hdr.flags & VFIO_USER_ERROR) {
+        return -msgp->hdr.error_reply;
+    }
+    trace_vfio_user_get_region_info(msgp->index, msgp->flags, msgp->size);
+
+    memcpy(info, &msgp->argsz, info->argsz);
+    return 0;
+}
+
+
+/*
+ * Socket-based io_ops
+ */
+
+static int vfio_user_io_get_region_info(VFIODevice *vbasedev,
+                                        struct vfio_region_info *info,
+                                        int *fd)
+{
+    int ret;
+    VFIOUserFDs fds = { 0, 1, fd};
+
+    ret = vfio_user_get_region_info(vbasedev->proxy, info, &fds);
+    if (ret) {
+        return ret;
+    }
+
+    if (info->index > vbasedev->num_regions) {
+        return -EINVAL;
+    }
+    /* cap_offset in valid area */
+    if ((info->flags & VFIO_REGION_INFO_FLAG_CAPS) &&
+        (info->cap_offset < sizeof(*info) || info->cap_offset > info->argsz)) {
+        return -EINVAL;
+    }
+
+    return 0;
+}
+
+VFIODeviceIO vfio_dev_io_sock = {
+    .get_region_info = vfio_user_io_get_region_info,
+};
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
index ff903c0..939113a 100644
--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
@@ -174,3 +174,4 @@  vfio_user_recv_request(uint16_t cmd) " command 0x%x"
 vfio_user_send_write(uint16_t id, int wrote) " id 0x%x wrote 0x%x"
 vfio_user_version(uint16_t major, uint16_t minor, const char *caps) " major %d minor %d caps: %s"
 vfio_user_get_info(uint32_t nregions, uint32_t nirqs) " #regions %d #irqs %d"
+vfio_user_get_region_info(uint32_t index, uint32_t flags, uint64_t size) " index %d flags 0x%x size 0x%"PRIx64