diff mbox series

[RFC,v2,08/16] vfio-user: get region info

Message ID d442105953151559982c8b1a753d847fb89da3ba.1629131628.git.elena.ufimtseva@oracle.com (mailing list archive)
State New, archived
Headers show
Series vfio-user implementation | expand

Commit Message

Elena Ufimtseva Aug. 16, 2021, 4:42 p.m. UTC
From: John Johnson <john.g.johnson@oracle.com>

Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
---
 hw/vfio/user-protocol.h       | 14 +++++++
 hw/vfio/user.h                |  2 +
 include/hw/vfio/vfio-common.h |  3 ++
 hw/vfio/common.c              | 76 ++++++++++++++++++++++++++++++++++-
 hw/vfio/user.c                | 33 +++++++++++++++
 5 files changed, 126 insertions(+), 2 deletions(-)

Comments

Stefan Hajnoczi Sept. 7, 2021, 2:31 p.m. UTC | #1
On Mon, Aug 16, 2021 at 09:42:41AM -0700, Elena Ufimtseva wrote:
> @@ -1514,6 +1515,16 @@ bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info,
>      return true;
>  }
>  
> +static int vfio_get_region_info_remfd(VFIODevice *vbasedev, int index)
> +{
> +    struct vfio_region_info *info;
> +
> +    if (vbasedev->regions == NULL || vbasedev->regions[index] == NULL) {
> +        vfio_get_region_info(vbasedev, index, &info);
> +    }

Maybe this will be called from other places in the future, but the
vfio_region_setup() caller below already invoked vfio_get_region_info()
so I'm not sure it's necessary to do this again?

Perhaps add an int *remfd argument to vfio_get_region_info(). That way
vfio_get_region_info_remfd() isn't necessary.

> @@ -2410,6 +2442,24 @@ int vfio_get_region_info(VFIODevice *vbasedev, int index,
>                           struct vfio_region_info **info)
>  {
>      size_t argsz = sizeof(struct vfio_region_info);
> +    int fd = -1;
> +    int ret;
> +
> +    /* create region cache */
> +    if (vbasedev->regions == NULL) {
> +        vbasedev->regions = g_new0(struct vfio_region_info *,
> +                                   vbasedev->num_regions);
> +        if (vbasedev->proxy != NULL) {
> +            vbasedev->regfds = g_new0(int, vbasedev->num_regions);
> +        }
> +    }
> +    /* check cache */
> +    if (vbasedev->regions[index] != NULL) {
> +        *info = g_malloc0(vbasedev->regions[index]->argsz);
> +        memcpy(*info, vbasedev->regions[index],
> +               vbasedev->regions[index]->argsz);
> +        return 0;
> +    }

Why is it necessary to introduce a cache? Is it to avoid passing the
same fd multiple times?

>  
>      *info = g_malloc0(argsz);
>  
> @@ -2417,7 +2467,17 @@ int vfio_get_region_info(VFIODevice *vbasedev, int index,
>  retry:
>      (*info)->argsz = argsz;
>  
> -    if (ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, *info)) {
> +    if (vbasedev->proxy != NULL) {
> +        VFIOUserFDs fds = { 0, 1, &fd};
> +
> +        ret = vfio_user_get_region_info(vbasedev, index, *info, &fds);
> +    } else {
> +        ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, *info);
> +        if (ret < 0) {
> +            ret = -errno;
> +        }
> +    }
> +    if (ret != 0) {
>          g_free(*info);
>          *info = NULL;
>          return -errno;
> @@ -2426,10 +2486,22 @@ retry:
>      if ((*info)->argsz > argsz) {
>          argsz = (*info)->argsz;
>          *info = g_realloc(*info, argsz);
> +        if (fd != -1) {
> +            close(fd);
> +            fd = -1;
> +        }
>  
>          goto retry;
>      }
>  
> +    /* fill cache */
> +    vbasedev->regions[index] = g_malloc0(argsz);
> +    memcpy(vbasedev->regions[index], *info, argsz);
> +    *vbasedev->regions[index] = **info;

The previous line already copied the contents of *info. What is the
purpose of this assignment?

> +    if (vbasedev->regfds != NULL) {
> +        vbasedev->regfds[index] = fd;
> +    }
> +
>      return 0;
>  }
>  
> diff --git a/hw/vfio/user.c b/hw/vfio/user.c
> index b584b8e0f2..91b51f37df 100644
> --- a/hw/vfio/user.c
> +++ b/hw/vfio/user.c
> @@ -734,3 +734,36 @@ int vfio_user_get_info(VFIODevice *vbasedev)
>      vbasedev->reset_works = !!(msg.flags & VFIO_DEVICE_FLAGS_RESET);
>      return 0;
>  }
> +
> +int vfio_user_get_region_info(VFIODevice *vbasedev, int index,
> +                              struct vfio_region_info *info, VFIOUserFDs *fds)
> +{
> +    g_autofree VFIOUserRegionInfo *msgp = NULL;
> +    int size;

Please use uint32_t size instead of int size to prevent possible
signedness issues:
- VFIOUserRegionInfo->argsz is uint32_t.
- sizeof(VFIOUserHdr) is size_t.
- The vfio_user_request_msg() size argument is uint32_t.
John Johnson Sept. 9, 2021, 5:35 a.m. UTC | #2
> On Sep 7, 2021, at 7:31 AM, Stefan Hajnoczi <stefanha@redhat.com> wrote:
> 
> On Mon, Aug 16, 2021 at 09:42:41AM -0700, Elena Ufimtseva wrote:
>> @@ -1514,6 +1515,16 @@ bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info,
>>     return true;
>> }
>> 
>> +static int vfio_get_region_info_remfd(VFIODevice *vbasedev, int index)
>> +{
>> +    struct vfio_region_info *info;
>> +
>> +    if (vbasedev->regions == NULL || vbasedev->regions[index] == NULL) {
>> +        vfio_get_region_info(vbasedev, index, &info);
>> +    }
> 
> Maybe this will be called from other places in the future, but the
> vfio_region_setup() caller below already invoked vfio_get_region_info()
> so I'm not sure it's necessary to do this again?
> 
> Perhaps add an int *remfd argument to vfio_get_region_info(). That way
> vfio_get_region_info_remfd() isn't necessary.
> 

	I think they could be combined, but the region capabilities are
retrieved with separate calls to vfio_get_region_info_cap(), so I followed
that precedent.


>> @@ -2410,6 +2442,24 @@ int vfio_get_region_info(VFIODevice *vbasedev, int index,
>>                          struct vfio_region_info **info)
>> {
>>     size_t argsz = sizeof(struct vfio_region_info);
>> +    int fd = -1;
>> +    int ret;
>> +
>> +    /* create region cache */
>> +    if (vbasedev->regions == NULL) {
>> +        vbasedev->regions = g_new0(struct vfio_region_info *,
>> +                                   vbasedev->num_regions);
>> +        if (vbasedev->proxy != NULL) {
>> +            vbasedev->regfds = g_new0(int, vbasedev->num_regions);
>> +        }
>> +    }
>> +    /* check cache */
>> +    if (vbasedev->regions[index] != NULL) {
>> +        *info = g_malloc0(vbasedev->regions[index]->argsz);
>> +        memcpy(*info, vbasedev->regions[index],
>> +               vbasedev->regions[index]->argsz);
>> +        return 0;
>> +    }
> 
> Why is it necessary to introduce a cache? Is it to avoid passing the
> same fd multiple times?
> 

	Yes.  For polled regions, the server returns an FD with each
message, so there would be an FD leak if I didn’t check that the region
hadn’t already returned one.  Since I had to cache the FD anyway, I
cached the whole struct.


>> 
>>     *info = g_malloc0(argsz);
>> 
>> @@ -2417,7 +2467,17 @@ int vfio_get_region_info(VFIODevice *vbasedev, int index,
>> retry:
>>     (*info)->argsz = argsz;
>> 
>> -    if (ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, *info)) {
>> +    if (vbasedev->proxy != NULL) {
>> +        VFIOUserFDs fds = { 0, 1, &fd};
>> +
>> +        ret = vfio_user_get_region_info(vbasedev, index, *info, &fds);
>> +    } else {
>> +        ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, *info);
>> +        if (ret < 0) {
>> +            ret = -errno;
>> +        }
>> +    }
>> +    if (ret != 0) {
>>         g_free(*info);
>>         *info = NULL;
>>         return -errno;
>> @@ -2426,10 +2486,22 @@ retry:
>>     if ((*info)->argsz > argsz) {
>>         argsz = (*info)->argsz;
>>         *info = g_realloc(*info, argsz);
>> +        if (fd != -1) {
>> +            close(fd);
>> +            fd = -1;
>> +        }
>> 
>>         goto retry;
>>     }
>> 
>> +    /* fill cache */
>> +    vbasedev->regions[index] = g_malloc0(argsz);
>> +    memcpy(vbasedev->regions[index], *info, argsz);
>> +    *vbasedev->regions[index] = **info;
> 
> The previous line already copied the contents of *info. What is the
> purpose of this assignment?
> 

	That might be a mis-merge.  The struct assignment was a bug
fixed several revs ago with the memcpy() call.


>> +    if (vbasedev->regfds != NULL) {
>> +        vbasedev->regfds[index] = fd;
>> +    }
>> +
>>     return 0;
>> }
>> 
>> diff --git a/hw/vfio/user.c b/hw/vfio/user.c
>> index b584b8e0f2..91b51f37df 100644
>> --- a/hw/vfio/user.c
>> +++ b/hw/vfio/user.c
>> @@ -734,3 +734,36 @@ int vfio_user_get_info(VFIODevice *vbasedev)
>>     vbasedev->reset_works = !!(msg.flags & VFIO_DEVICE_FLAGS_RESET);
>>     return 0;
>> }
>> +
>> +int vfio_user_get_region_info(VFIODevice *vbasedev, int index,
>> +                              struct vfio_region_info *info, VFIOUserFDs *fds)
>> +{
>> +    g_autofree VFIOUserRegionInfo *msgp = NULL;
>> +    int size;
> 
> Please use uint32_t size instead of int size to prevent possible
> signedness issues:
> - VFIOUserRegionInfo->argsz is uint32_t.
> - sizeof(VFIOUserHdr) is size_t.
> - The vfio_user_request_msg() size argument is uint32_t.

	OK

		JJ
Stefan Hajnoczi Sept. 9, 2021, 5:59 a.m. UTC | #3
On Thu, Sep 09, 2021 at 05:35:40AM +0000, John Johnson wrote:
> 
> 
> > On Sep 7, 2021, at 7:31 AM, Stefan Hajnoczi <stefanha@redhat.com> wrote:
> > 
> > On Mon, Aug 16, 2021 at 09:42:41AM -0700, Elena Ufimtseva wrote:
> >> @@ -1514,6 +1515,16 @@ bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info,
> >>     return true;
> >> }
> >> 
> >> +static int vfio_get_region_info_remfd(VFIODevice *vbasedev, int index)
> >> +{
> >> +    struct vfio_region_info *info;
> >> +
> >> +    if (vbasedev->regions == NULL || vbasedev->regions[index] == NULL) {
> >> +        vfio_get_region_info(vbasedev, index, &info);
> >> +    }
> > 
> > Maybe this will be called from other places in the future, but the
> > vfio_region_setup() caller below already invoked vfio_get_region_info()
> > so I'm not sure it's necessary to do this again?
> > 
> > Perhaps add an int *remfd argument to vfio_get_region_info(). That way
> > vfio_get_region_info_remfd() isn't necessary.
> > 
> 
> 	I think they could be combined, but the region capabilities are
> retrieved with separate calls to vfio_get_region_info_cap(), so I followed
> that precedent.
> 
> 
> >> @@ -2410,6 +2442,24 @@ int vfio_get_region_info(VFIODevice *vbasedev, int index,
> >>                          struct vfio_region_info **info)
> >> {
> >>     size_t argsz = sizeof(struct vfio_region_info);
> >> +    int fd = -1;
> >> +    int ret;
> >> +
> >> +    /* create region cache */
> >> +    if (vbasedev->regions == NULL) {
> >> +        vbasedev->regions = g_new0(struct vfio_region_info *,
> >> +                                   vbasedev->num_regions);
> >> +        if (vbasedev->proxy != NULL) {
> >> +            vbasedev->regfds = g_new0(int, vbasedev->num_regions);
> >> +        }
> >> +    }
> >> +    /* check cache */
> >> +    if (vbasedev->regions[index] != NULL) {
> >> +        *info = g_malloc0(vbasedev->regions[index]->argsz);
> >> +        memcpy(*info, vbasedev->regions[index],
> >> +               vbasedev->regions[index]->argsz);
> >> +        return 0;
> >> +    }
> > 
> > Why is it necessary to introduce a cache? Is it to avoid passing the
> > same fd multiple times?
> > 
> 
> 	Yes.  For polled regions, the server returns an FD with each
> message, so there would be an FD leak if I didn’t check that the region
> hadn’t already returned one.  Since I had to cache the FD anyway, I
> cached the whole struct.

If vfio_get_region_info() takes an int *fd argument then fd ownership
becomes explicit and the need for the cache falls away. Maybe Alex has a
preference for how to structure the code to track per-region fds.

Stefan
diff mbox series

Patch

diff --git a/hw/vfio/user-protocol.h b/hw/vfio/user-protocol.h
index 13e44ebf1c..104bf4ff31 100644
--- a/hw/vfio/user-protocol.h
+++ b/hw/vfio/user-protocol.h
@@ -95,4 +95,18 @@  typedef struct {
     uint32_t cap_offset;
 } VFIOUserDeviceInfo;
 
+/*
+ * VFIO_USER_DEVICE_GET_REGION_INFO
+ * imported from struct_vfio_region_info
+ */
+typedef struct {
+    VFIOUserHdr hdr;
+    uint32_t argsz;
+    uint32_t flags;
+    uint32_t index;
+    uint32_t cap_offset;
+    uint64_t size;
+    uint64_t offset;
+} VFIOUserRegionInfo;
+
 #endif /* VFIO_USER_PROTOCOL_H */
diff --git a/hw/vfio/user.h b/hw/vfio/user.h
index 82044e7e78..f0122539ba 100644
--- a/hw/vfio/user.h
+++ b/hw/vfio/user.h
@@ -72,5 +72,7 @@  void vfio_user_set_reqhandler(VFIODevice *vbasdev,
 void vfio_user_send_reply(VFIOProxy *proxy, char *buf, int ret);
 int vfio_user_validate_version(VFIODevice *vbasedev, Error **errp);
 int vfio_user_get_info(VFIODevice *vbasedev);
+int vfio_user_get_region_info(VFIODevice *vbasedev, int index,
+                              struct vfio_region_info *info, VFIOUserFDs *fds);
 
 #endif /* VFIO_USER_H */
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index f43dc6e5d0..bdd25a546c 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -56,6 +56,7 @@  typedef struct VFIORegion {
     uint32_t nr_mmaps;
     VFIOMmap *mmaps;
     uint8_t nr; /* cache the region number for debug */
+    int remfd; /* fd if exported from remote process */
 } VFIORegion;
 
 typedef struct VFIOMigration {
@@ -145,6 +146,8 @@  typedef struct VFIODevice {
     Error *migration_blocker;
     OnOffAuto pre_copy_dirty_page_tracking;
     VFIOProxy *proxy;
+    struct vfio_region_info **regions;
+    int *regfds;
 } VFIODevice;
 
 struct VFIODeviceOps {
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 8728d4d5c2..7d667b0533 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -40,6 +40,7 @@ 
 #include "trace.h"
 #include "qapi/error.h"
 #include "migration/migration.h"
+#include "hw/vfio/user.h"
 
 VFIOGroupList vfio_group_list =
     QLIST_HEAD_INITIALIZER(vfio_group_list);
@@ -1514,6 +1515,16 @@  bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info,
     return true;
 }
 
+static int vfio_get_region_info_remfd(VFIODevice *vbasedev, int index)
+{
+    struct vfio_region_info *info;
+
+    if (vbasedev->regions == NULL || vbasedev->regions[index] == NULL) {
+        vfio_get_region_info(vbasedev, index, &info);
+    }
+    return vbasedev->regfds != NULL ? vbasedev->regfds[index] : -1;
+}
+
 static int vfio_setup_region_sparse_mmaps(VFIORegion *region,
                                           struct vfio_region_info *info)
 {
@@ -1567,6 +1578,7 @@  int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region,
     region->size = info->size;
     region->fd_offset = info->offset;
     region->nr = index;
+    region->remfd = vfio_get_region_info_remfd(vbasedev, index);
 
     if (region->size) {
         region->mem = g_new0(MemoryRegion, 1);
@@ -1610,6 +1622,7 @@  int vfio_region_mmap(VFIORegion *region)
 {
     int i, prot = 0;
     char *name;
+    int fd;
 
     if (!region->mem) {
         return 0;
@@ -1618,9 +1631,11 @@  int vfio_region_mmap(VFIORegion *region)
     prot |= region->flags & VFIO_REGION_INFO_FLAG_READ ? PROT_READ : 0;
     prot |= region->flags & VFIO_REGION_INFO_FLAG_WRITE ? PROT_WRITE : 0;
 
+    fd = region->remfd != -1 ? region->remfd : region->vbasedev->fd;
+
     for (i = 0; i < region->nr_mmaps; i++) {
         region->mmaps[i].mmap = mmap(NULL, region->mmaps[i].size, prot,
-                                     MAP_SHARED, region->vbasedev->fd,
+                                     MAP_SHARED, fd,
                                      region->fd_offset +
                                      region->mmaps[i].offset);
         if (region->mmaps[i].mmap == MAP_FAILED) {
@@ -2397,6 +2412,23 @@  int vfio_get_device(VFIOGroup *group, const char *name,
 
 void vfio_put_base_device(VFIODevice *vbasedev)
 {
+    if (vbasedev->regions != NULL) {
+        int i;
+
+        for (i = 0; i < vbasedev->num_regions; i++) {
+            if (vbasedev->regfds != NULL && vbasedev->regfds[i] != -1) {
+                close(vbasedev->regfds[i]);
+            }
+            g_free(vbasedev->regions[i]);
+        }
+        g_free(vbasedev->regions);
+        vbasedev->regions = NULL;
+        if (vbasedev->regfds != NULL) {
+            g_free(vbasedev->regfds);
+            vbasedev->regfds = NULL;
+        }
+    }
+
     if (!vbasedev->group) {
         return;
     }
@@ -2410,6 +2442,24 @@  int vfio_get_region_info(VFIODevice *vbasedev, int index,
                          struct vfio_region_info **info)
 {
     size_t argsz = sizeof(struct vfio_region_info);
+    int fd = -1;
+    int ret;
+
+    /* create region cache */
+    if (vbasedev->regions == NULL) {
+        vbasedev->regions = g_new0(struct vfio_region_info *,
+                                   vbasedev->num_regions);
+        if (vbasedev->proxy != NULL) {
+            vbasedev->regfds = g_new0(int, vbasedev->num_regions);
+        }
+    }
+    /* check cache */
+    if (vbasedev->regions[index] != NULL) {
+        *info = g_malloc0(vbasedev->regions[index]->argsz);
+        memcpy(*info, vbasedev->regions[index],
+               vbasedev->regions[index]->argsz);
+        return 0;
+    }
 
     *info = g_malloc0(argsz);
 
@@ -2417,7 +2467,17 @@  int vfio_get_region_info(VFIODevice *vbasedev, int index,
 retry:
     (*info)->argsz = argsz;
 
-    if (ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, *info)) {
+    if (vbasedev->proxy != NULL) {
+        VFIOUserFDs fds = { 0, 1, &fd};
+
+        ret = vfio_user_get_region_info(vbasedev, index, *info, &fds);
+    } else {
+        ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, *info);
+        if (ret < 0) {
+            ret = -errno;
+        }
+    }
+    if (ret != 0) {
         g_free(*info);
         *info = NULL;
         return -errno;
@@ -2426,10 +2486,22 @@  retry:
     if ((*info)->argsz > argsz) {
         argsz = (*info)->argsz;
         *info = g_realloc(*info, argsz);
+        if (fd != -1) {
+            close(fd);
+            fd = -1;
+        }
 
         goto retry;
     }
 
+    /* fill cache */
+    vbasedev->regions[index] = g_malloc0(argsz);
+    memcpy(vbasedev->regions[index], *info, argsz);
+    *vbasedev->regions[index] = **info;
+    if (vbasedev->regfds != NULL) {
+        vbasedev->regfds[index] = fd;
+    }
+
     return 0;
 }
 
diff --git a/hw/vfio/user.c b/hw/vfio/user.c
index b584b8e0f2..91b51f37df 100644
--- a/hw/vfio/user.c
+++ b/hw/vfio/user.c
@@ -734,3 +734,36 @@  int vfio_user_get_info(VFIODevice *vbasedev)
     vbasedev->reset_works = !!(msg.flags & VFIO_DEVICE_FLAGS_RESET);
     return 0;
 }
+
+int vfio_user_get_region_info(VFIODevice *vbasedev, int index,
+                              struct vfio_region_info *info, VFIOUserFDs *fds)
+{
+    g_autofree VFIOUserRegionInfo *msgp = NULL;
+    int size;
+
+    /* data returned can be larger than vfio_region_info */
+    if (info->argsz < sizeof(*info)) {
+        error_printf("vfio_user_get_region_info argsz too small\n");
+        return -EINVAL;
+    }
+    if (fds != NULL && fds->send_fds != 0) {
+        error_printf("vfio_user_get_region_info can't send FDs\n");
+        return -EINVAL;
+    }
+
+    size = info->argsz + sizeof(VFIOUserHdr);
+    msgp = g_malloc0(size);
+
+    vfio_user_request_msg(&msgp->hdr, VFIO_USER_DEVICE_GET_REGION_INFO,
+                          sizeof(*msgp), 0);
+    msgp->argsz = info->argsz;
+    msgp->index = info->index;
+
+    vfio_user_send_recv(vbasedev->proxy, &msgp->hdr, fds, size, 0);
+    if (msgp->hdr.flags & VFIO_USER_ERROR) {
+        return -msgp->hdr.error_reply;
+    }
+
+    memcpy(info, &msgp->argsz, info->argsz);
+    return 0;
+}