Message ID | 20230401144429.88673-6-yi.l.liu@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Introduce new methods for verifying ownership in vfio PCI hot reset | expand |
Hi Yi, On 4/1/23 16:44, Yi Liu wrote: > as an alternative method for ownership check when iommufd is used. In I don't understand the 1st sentence. > this case all opened devices in the affected dev_set are verified to > be bound to a same valid iommufd value to allow reset. It's simpler > and faster as user does not need to pass a set of fds and kernel no kernel does not need to search > need to search the device within the given fds. > > a device in noiommu mode doesn't have a valid iommufd, so this method > should not be used in a dev_set which contains multiple devices and one > of them is in noiommu. The only allowed noiommu scenario is that the > calling device is noiommu and it's in a singleton dev_set. > > Suggested-by: Jason Gunthorpe <jgg@nvidia.com> > Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> > Reviewed-by: Jason Gunthorpe <jgg@nvidia.com> > Tested-by: Yanting Jiang <yanting.jiang@intel.com> > Signed-off-by: Yi Liu <yi.l.liu@intel.com> > --- > drivers/vfio/pci/vfio_pci_core.c | 42 +++++++++++++++++++++++++++----- > include/uapi/linux/vfio.h | 9 ++++++- > 2 files changed, 44 insertions(+), 7 deletions(-) > > diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c > index 3696b8e58445..b68fcba67a4b 100644 > --- a/drivers/vfio/pci/vfio_pci_core.c > +++ b/drivers/vfio/pci/vfio_pci_core.c > @@ -180,7 +180,8 @@ static void vfio_pci_probe_mmaps(struct vfio_pci_core_device *vdev) > struct vfio_pci_group_info; > static void vfio_pci_dev_set_try_reset(struct vfio_device_set *dev_set); > static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set, > - struct vfio_pci_group_info *groups); > + struct vfio_pci_group_info *groups, > + struct iommufd_ctx *iommufd_ctx); > > /* > * INTx masking requires the ability to disable INTx signaling via PCI_COMMAND > @@ -1277,7 +1278,7 @@ vfio_pci_ioctl_pci_hot_reset_groups(struct vfio_pci_core_device *vdev, > return ret; > > /* Somewhere between 1 and count is OK */ > - if (!hdr->count || hdr->count > count) > + if (hdr->count > count) then I would simply remove the above comment since !count check is done by the caller. > return -EINVAL; > > group_fds = kcalloc(hdr->count, sizeof(*group_fds), GFP_KERNEL); > @@ -1326,7 +1327,7 @@ vfio_pci_ioctl_pci_hot_reset_groups(struct vfio_pci_core_device *vdev, > info.count = hdr->count; > info.files = files; > > - ret = vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, &info); > + ret = vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, &info, NULL); > > hot_reset_release: > for (file_idx--; file_idx >= 0; file_idx--) > @@ -1341,6 +1342,7 @@ static int vfio_pci_ioctl_pci_hot_reset(struct vfio_pci_core_device *vdev, > { > unsigned long minsz = offsetofend(struct vfio_pci_hot_reset, count); > struct vfio_pci_hot_reset hdr; > + struct iommufd_ctx *iommufd; > bool slot = false; > > if (copy_from_user(&hdr, arg, minsz)) > @@ -1355,7 +1357,12 @@ static int vfio_pci_ioctl_pci_hot_reset(struct vfio_pci_core_device *vdev, > else if (pci_probe_reset_bus(vdev->pdev->bus)) > return -ENODEV; > > - return vfio_pci_ioctl_pci_hot_reset_groups(vdev, &hdr, slot, arg); > + if (hdr.count) > + return vfio_pci_ioctl_pci_hot_reset_groups(vdev, &hdr, slot, arg); > + > + iommufd = vfio_iommufd_physical_ictx(&vdev->vdev); > + > + return vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, NULL, iommufd); > } > > static int vfio_pci_ioctl_ioeventfd(struct vfio_pci_core_device *vdev, > @@ -2327,6 +2334,9 @@ static bool vfio_dev_in_groups(struct vfio_pci_core_device *vdev, > { > unsigned int i; > > + if (!groups) > + return false; > + > for (i = 0; i < groups->count; i++) > if (vfio_file_has_dev(groups->files[i], &vdev->vdev)) > return true; > @@ -2402,13 +2412,25 @@ static int vfio_pci_dev_set_pm_runtime_get(struct vfio_device_set *dev_set) > return ret; > } > > +static bool vfio_dev_in_iommufd_ctx(struct vfio_pci_core_device *vdev, > + struct iommufd_ctx *iommufd_ctx) > +{ > + struct iommufd_ctx *iommufd = vfio_iommufd_physical_ictx(&vdev->vdev); > + > + if (!iommufd) > + return false; > + > + return iommufd == iommufd_ctx; > +} > + > /* > * We need to get memory_lock for each device, but devices can share mmap_lock, > * therefore we need to zap and hold the vma_lock for each device, and only then > * get each memory_lock. > */ > static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set, > - struct vfio_pci_group_info *groups) > + struct vfio_pci_group_info *groups, > + struct iommufd_ctx *iommufd_ctx) > { > struct vfio_pci_core_device *cur_mem; > struct vfio_pci_core_device *cur_vma; > @@ -2448,9 +2470,17 @@ static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set, > * > * Otherwise all opened devices in the dev_set must be > * contained by the set of groups provided by the user. > + * > + * If user provides a zero-length array, then all the > + * opened devices must be bound to a same iommufd_ctx. > + * > + * If all above checks are failed, reset is allowed only if > + * the calling device is in a singleton dev_set. > */ > if (cur_vma->vdev.open_count && > - !vfio_dev_in_groups(cur_vma, groups)) { > + !vfio_dev_in_groups(cur_vma, groups) && > + !vfio_dev_in_iommufd_ctx(cur_vma, iommufd_ctx) && > + (dev_set->device_count > 1)) { > ret = -EINVAL; > goto err_undo; > } > diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h > index f96e5689cffc..17aa5d09db41 100644 > --- a/include/uapi/linux/vfio.h > +++ b/include/uapi/linux/vfio.h > @@ -679,7 +679,14 @@ struct vfio_pci_hot_reset_info { > * the calling user must ensure all affected devices, if opened, are > * owned by itself. > * > - * The ownership is proved by an array of group fds. > + * The ownership can be proved by: > + * - An array of group fds > + * - A zero-length array I would suggest something alike in case a non void group fd array is passed, the devices affected by the reset must belong to those opened VFIO groups. in case a zero length array is passed, the other devices affected by the reset, if any, must be bound to the same iommufd as this VFIO device Either of the 2 methods is applied to check the feasibility of the reset > + * > + * In the last case all affected devices which are opened by this user > + * must have been bound to a same iommufd. If the calling device is in > + * noiommu mode (no valid iommufd) then it can be reset only if the reset > + * doesn't affect other devices. and keep that too > * > * Return: 0 on success, -errno on failure. > */ Thanks Eric
On Sat, 1 Apr 2023 07:44:22 -0700 Yi Liu <yi.l.liu@intel.com> wrote: > as an alternative method for ownership check when iommufd is used. In > this case all opened devices in the affected dev_set are verified to > be bound to a same valid iommufd value to allow reset. It's simpler > and faster as user does not need to pass a set of fds and kernel no > need to search the device within the given fds. > > a device in noiommu mode doesn't have a valid iommufd, so this method > should not be used in a dev_set which contains multiple devices and one > of them is in noiommu. The only allowed noiommu scenario is that the > calling device is noiommu and it's in a singleton dev_set. > > Suggested-by: Jason Gunthorpe <jgg@nvidia.com> > Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> > Reviewed-by: Jason Gunthorpe <jgg@nvidia.com> > Tested-by: Yanting Jiang <yanting.jiang@intel.com> > Signed-off-by: Yi Liu <yi.l.liu@intel.com> > --- > drivers/vfio/pci/vfio_pci_core.c | 42 +++++++++++++++++++++++++++----- > include/uapi/linux/vfio.h | 9 ++++++- > 2 files changed, 44 insertions(+), 7 deletions(-) > > diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c > index 3696b8e58445..b68fcba67a4b 100644 > --- a/drivers/vfio/pci/vfio_pci_core.c > +++ b/drivers/vfio/pci/vfio_pci_core.c > @@ -180,7 +180,8 @@ static void vfio_pci_probe_mmaps(struct vfio_pci_core_device *vdev) > struct vfio_pci_group_info; > static void vfio_pci_dev_set_try_reset(struct vfio_device_set *dev_set); > static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set, > - struct vfio_pci_group_info *groups); > + struct vfio_pci_group_info *groups, > + struct iommufd_ctx *iommufd_ctx); > > /* > * INTx masking requires the ability to disable INTx signaling via PCI_COMMAND > @@ -1277,7 +1278,7 @@ vfio_pci_ioctl_pci_hot_reset_groups(struct vfio_pci_core_device *vdev, > return ret; > > /* Somewhere between 1 and count is OK */ > - if (!hdr->count || hdr->count > count) > + if (hdr->count > count) > return -EINVAL; > > group_fds = kcalloc(hdr->count, sizeof(*group_fds), GFP_KERNEL); > @@ -1326,7 +1327,7 @@ vfio_pci_ioctl_pci_hot_reset_groups(struct vfio_pci_core_device *vdev, > info.count = hdr->count; > info.files = files; > > - ret = vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, &info); > + ret = vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, &info, NULL); > > hot_reset_release: > for (file_idx--; file_idx >= 0; file_idx--) > @@ -1341,6 +1342,7 @@ static int vfio_pci_ioctl_pci_hot_reset(struct vfio_pci_core_device *vdev, > { > unsigned long minsz = offsetofend(struct vfio_pci_hot_reset, count); > struct vfio_pci_hot_reset hdr; > + struct iommufd_ctx *iommufd; > bool slot = false; > > if (copy_from_user(&hdr, arg, minsz)) > @@ -1355,7 +1357,12 @@ static int vfio_pci_ioctl_pci_hot_reset(struct vfio_pci_core_device *vdev, > else if (pci_probe_reset_bus(vdev->pdev->bus)) > return -ENODEV; > > - return vfio_pci_ioctl_pci_hot_reset_groups(vdev, &hdr, slot, arg); > + if (hdr.count) > + return vfio_pci_ioctl_pci_hot_reset_groups(vdev, &hdr, slot, arg); > + > + iommufd = vfio_iommufd_physical_ictx(&vdev->vdev); > + > + return vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, NULL, iommufd); > } > > static int vfio_pci_ioctl_ioeventfd(struct vfio_pci_core_device *vdev, > @@ -2327,6 +2334,9 @@ static bool vfio_dev_in_groups(struct vfio_pci_core_device *vdev, > { > unsigned int i; > > + if (!groups) > + return false; > + > for (i = 0; i < groups->count; i++) > if (vfio_file_has_dev(groups->files[i], &vdev->vdev)) > return true; > @@ -2402,13 +2412,25 @@ static int vfio_pci_dev_set_pm_runtime_get(struct vfio_device_set *dev_set) > return ret; > } > > +static bool vfio_dev_in_iommufd_ctx(struct vfio_pci_core_device *vdev, > + struct iommufd_ctx *iommufd_ctx) > +{ > + struct iommufd_ctx *iommufd = vfio_iommufd_physical_ictx(&vdev->vdev); > + > + if (!iommufd) > + return false; > + > + return iommufd == iommufd_ctx; > +} > + > /* > * We need to get memory_lock for each device, but devices can share mmap_lock, > * therefore we need to zap and hold the vma_lock for each device, and only then > * get each memory_lock. > */ > static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set, > - struct vfio_pci_group_info *groups) > + struct vfio_pci_group_info *groups, > + struct iommufd_ctx *iommufd_ctx) > { > struct vfio_pci_core_device *cur_mem; > struct vfio_pci_core_device *cur_vma; > @@ -2448,9 +2470,17 @@ static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set, > * > * Otherwise all opened devices in the dev_set must be > * contained by the set of groups provided by the user. > + * > + * If user provides a zero-length array, then all the > + * opened devices must be bound to a same iommufd_ctx. > + * > + * If all above checks are failed, reset is allowed only if > + * the calling device is in a singleton dev_set. > */ > if (cur_vma->vdev.open_count && > - !vfio_dev_in_groups(cur_vma, groups)) { > + !vfio_dev_in_groups(cur_vma, groups) && > + !vfio_dev_in_iommufd_ctx(cur_vma, iommufd_ctx) && > + (dev_set->device_count > 1)) { This last condition looks buggy to me, we need all conditions to be true to generate an error here, which means that for a singleton dev_set, it doesn't matter what group fds are passed, if any, or whether the iommufd context matches. I think in fact this means that the empty array path is equally available for group use cases with a singleton dev_set, but we don't enable it for multiple device dev_sets like we do iommufd. You pointed out a previous issue with hot-reset info and no-iommu where if other affected devices are not bound to vfio-pci the info ioctl returns error. That's handled in the hot-reset ioctl by the fact that all affected devices must be in the dev_set and therefore bound to vfio-pci drivers. So it seems to me that aside from the spurious error because we can't report an iommu group when none exists, and didn't spot it to invent an invalid group for debugging, hot-reset otherwise works with no-iommu just like it does for iommu backed devices. We don't currently require singleton no-iommu dev_sets afaict. I'll also note that if the dev_set is singleton, this suggests that pci_reset_function() can make use of bus reset, so a hot-reset is accessible via VFIO_DEVICE_RESET if the appropriate reset method is selected. Therefore, I think as written, the singleton dev_set hot-reset is enabled for iommufd and (unintentionally?) for the group path, while also negating a requirement for a group fd or that a provided group fd actually matches the device in this latter case. The null-array approach is not however extended to groups for more general use. Additionally, limiting no-iommu hot-reset to singleton dev_sets provides only a marginal functional difference vs VFIO_DEVICE_RESET. Thanks, Alex > ret = -EINVAL; > goto err_undo; > } > diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h > index f96e5689cffc..17aa5d09db41 100644 > --- a/include/uapi/linux/vfio.h > +++ b/include/uapi/linux/vfio.h > @@ -679,7 +679,14 @@ struct vfio_pci_hot_reset_info { > * the calling user must ensure all affected devices, if opened, are > * owned by itself. > * > - * The ownership is proved by an array of group fds. > + * The ownership can be proved by: > + * - An array of group fds > + * - A zero-length array > + * > + * In the last case all affected devices which are opened by this user > + * must have been bound to a same iommufd. If the calling device is in > + * noiommu mode (no valid iommufd) then it can be reset only if the reset > + * doesn't affect other devices. > * > * Return: 0 on success, -errno on failure. > */
> From: Alex Williamson <alex.williamson@redhat.com> > Sent: Wednesday, April 5, 2023 4:19 AM > > On Sat, 1 Apr 2023 07:44:22 -0700 > Yi Liu <yi.l.liu@intel.com> wrote: > > > as an alternative method for ownership check when iommufd is used. In > > this case all opened devices in the affected dev_set are verified to > > be bound to a same valid iommufd value to allow reset. It's simpler > > and faster as user does not need to pass a set of fds and kernel no > > need to search the device within the given fds. > > > > a device in noiommu mode doesn't have a valid iommufd, so this method > > should not be used in a dev_set which contains multiple devices and one > > of them is in noiommu. The only allowed noiommu scenario is that the > > calling device is noiommu and it's in a singleton dev_set. > > > > Suggested-by: Jason Gunthorpe <jgg@nvidia.com> > > Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> > > Reviewed-by: Jason Gunthorpe <jgg@nvidia.com> > > Tested-by: Yanting Jiang <yanting.jiang@intel.com> > > Signed-off-by: Yi Liu <yi.l.liu@intel.com> > > --- > > drivers/vfio/pci/vfio_pci_core.c | 42 +++++++++++++++++++++++++++----- > > include/uapi/linux/vfio.h | 9 ++++++- > > 2 files changed, 44 insertions(+), 7 deletions(-) > > > > diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c > > index 3696b8e58445..b68fcba67a4b 100644 > > --- a/drivers/vfio/pci/vfio_pci_core.c > > +++ b/drivers/vfio/pci/vfio_pci_core.c > > @@ -180,7 +180,8 @@ static void vfio_pci_probe_mmaps(struct > vfio_pci_core_device *vdev) > > struct vfio_pci_group_info; > > static void vfio_pci_dev_set_try_reset(struct vfio_device_set *dev_set); > > static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set, > > - struct vfio_pci_group_info *groups); > > + struct vfio_pci_group_info *groups, > > + struct iommufd_ctx *iommufd_ctx); > > > > /* > > * INTx masking requires the ability to disable INTx signaling via PCI_COMMAND > > @@ -1277,7 +1278,7 @@ vfio_pci_ioctl_pci_hot_reset_groups(struct > vfio_pci_core_device *vdev, > > return ret; > > > > /* Somewhere between 1 and count is OK */ > > - if (!hdr->count || hdr->count > count) > > + if (hdr->count > count) > > return -EINVAL; > > > > group_fds = kcalloc(hdr->count, sizeof(*group_fds), GFP_KERNEL); > > @@ -1326,7 +1327,7 @@ vfio_pci_ioctl_pci_hot_reset_groups(struct > vfio_pci_core_device *vdev, > > info.count = hdr->count; > > info.files = files; > > > > - ret = vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, &info); > > + ret = vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, &info, NULL); > > > > hot_reset_release: > > for (file_idx--; file_idx >= 0; file_idx--) > > @@ -1341,6 +1342,7 @@ static int vfio_pci_ioctl_pci_hot_reset(struct > vfio_pci_core_device *vdev, > > { > > unsigned long minsz = offsetofend(struct vfio_pci_hot_reset, count); > > struct vfio_pci_hot_reset hdr; > > + struct iommufd_ctx *iommufd; > > bool slot = false; > > > > if (copy_from_user(&hdr, arg, minsz)) > > @@ -1355,7 +1357,12 @@ static int vfio_pci_ioctl_pci_hot_reset(struct > vfio_pci_core_device *vdev, > > else if (pci_probe_reset_bus(vdev->pdev->bus)) > > return -ENODEV; > > > > - return vfio_pci_ioctl_pci_hot_reset_groups(vdev, &hdr, slot, arg); > > + if (hdr.count) > > + return vfio_pci_ioctl_pci_hot_reset_groups(vdev, &hdr, slot, arg); > > + > > + iommufd = vfio_iommufd_physical_ictx(&vdev->vdev); > > + > > + return vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, NULL, iommufd); > > } > > > > static int vfio_pci_ioctl_ioeventfd(struct vfio_pci_core_device *vdev, > > @@ -2327,6 +2334,9 @@ static bool vfio_dev_in_groups(struct > vfio_pci_core_device *vdev, > > { > > unsigned int i; > > > > + if (!groups) > > + return false; > > + > > for (i = 0; i < groups->count; i++) > > if (vfio_file_has_dev(groups->files[i], &vdev->vdev)) > > return true; > > @@ -2402,13 +2412,25 @@ static int vfio_pci_dev_set_pm_runtime_get(struct > vfio_device_set *dev_set) > > return ret; > > } > > > > +static bool vfio_dev_in_iommufd_ctx(struct vfio_pci_core_device *vdev, > > + struct iommufd_ctx *iommufd_ctx) > > +{ > > + struct iommufd_ctx *iommufd = vfio_iommufd_physical_ictx(&vdev->vdev); > > + > > + if (!iommufd) > > + return false; > > + > > + return iommufd == iommufd_ctx; > > +} > > + > > /* > > * We need to get memory_lock for each device, but devices can share mmap_lock, > > * therefore we need to zap and hold the vma_lock for each device, and only then > > * get each memory_lock. > > */ > > static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set, > > - struct vfio_pci_group_info *groups) > > + struct vfio_pci_group_info *groups, > > + struct iommufd_ctx *iommufd_ctx) > > { > > struct vfio_pci_core_device *cur_mem; > > struct vfio_pci_core_device *cur_vma; > > @@ -2448,9 +2470,17 @@ static int vfio_pci_dev_set_hot_reset(struct > vfio_device_set *dev_set, > > * > > * Otherwise all opened devices in the dev_set must be > > * contained by the set of groups provided by the user. > > + * > > + * If user provides a zero-length array, then all the > > + * opened devices must be bound to a same iommufd_ctx. > > + * > > + * If all above checks are failed, reset is allowed only if > > + * the calling device is in a singleton dev_set. > > */ > > if (cur_vma->vdev.open_count && > > - !vfio_dev_in_groups(cur_vma, groups)) { > > + !vfio_dev_in_groups(cur_vma, groups) && > > + !vfio_dev_in_iommufd_ctx(cur_vma, iommufd_ctx) && > > + (dev_set->device_count > 1)) { > > This last condition looks buggy to me, we need all conditions to be > true to generate an error here, which means that for a singleton > dev_set, it doesn't matter what group fds are passed, if any, or whether > the iommufd context matches. I think in fact this means that the empty > array path is equally available for group use cases with a singleton > dev_set, but we don't enable it for multiple device dev_sets like we do > iommufd. you are right. The last condition allows the empty-fd array path to work for the group use case if the dev_set happens to be a singleton. > > You pointed out a previous issue with hot-reset info and no-iommu where > if other affected devices are not bound to vfio-pci the info ioctl > returns error. That's handled in the hot-reset ioctl by the fact that > all affected devices must be in the dev_set and therefore bound to > vfio-pci drivers. yes, hot-reset ioctl requires all affected devices listed in the dev_set. So for the case there are devices not bound to vfio yet, hot-reset ioctl just fails. If all affected devices are in the dev_set, they will have a fake group allocated by vfio. So the info ioctl won't fail. > So it seems to me that aside from the spurious error > because we can't report an iommu group when none exists, and didn't > spot it to invent an invalid group for debugging, hot-reset otherwise > works with no-iommu just like it does for iommu backed devices. We > don't currently require singleton no-iommu dev_sets afaict. yes. the requirement for hot-reset is the same between no-iommu and the iommufd backed devices. > I'll also note that if the dev_set is singleton, this suggests that > pci_reset_function() can make use of bus reset, so a hot-reset is > accessible via VFIO_DEVICE_RESET if the appropriate reset method is > selected. yes. so does it mean not necessary to allow singleton dev_set support in hot-reset ioctl? If user uses hot-reset, it should because of unable to use VFIO_DEVICE_RESET, is it? > > Therefore, I think as written, the singleton dev_set hot-reset is > enabled for iommufd and (unintentionally?) for the group path, while > also negating a requirement for a group fd or that a provided group fd > actually matches the device in this latter case. The null-array > approach is not however extended to groups for more general use. > Additionally, limiting no-iommu hot-reset to singleton dev_sets > provides only a marginal functional difference vs VFIO_DEVICE_RESET. I think the singletion dev_set hot-reset is for iommufd (or more accurately for the noiommu case in cdev path). > Thanks, > > Alex > > > ret = -EINVAL; > > goto err_undo; > > } > > diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h > > index f96e5689cffc..17aa5d09db41 100644 > > --- a/include/uapi/linux/vfio.h > > +++ b/include/uapi/linux/vfio.h > > @@ -679,7 +679,14 @@ struct vfio_pci_hot_reset_info { > > * the calling user must ensure all affected devices, if opened, are > > * owned by itself. > > * > > - * The ownership is proved by an array of group fds. > > + * The ownership can be proved by: > > + * - An array of group fds > > + * - A zero-length array > > + * > > + * In the last case all affected devices which are opened by this user > > + * must have been bound to a same iommufd. If the calling device is in > > + * noiommu mode (no valid iommufd) then it can be reset only if the reset > > + * doesn't affect other devices. > > * > > * Return: 0 on success, -errno on failure. > > */
> From: Liu, Yi L <yi.l.liu@intel.com> > Sent: Wednesday, April 5, 2023 3:55 PM > > > > Therefore, I think as written, the singleton dev_set hot-reset is > > enabled for iommufd and (unintentionally?) for the group path, while > > also negating a requirement for a group fd or that a provided group fd > > actually matches the device in this latter case. The null-array > > approach is not however extended to groups for more general use. > > Additionally, limiting no-iommu hot-reset to singleton dev_sets > > provides only a marginal functional difference vs VFIO_DEVICE_RESET. > > I think the singletion dev_set hot-reset is for iommufd (or more accurately > for the noiommu case in cdev path). but actually, singleton dev_set hot-reset can work for group path as well. Based on this, I'm also wondering do we really want to have singleton dev_set hot-reset only for cdev noiommu case? or we allow it generally or just don't support it as it is equivalent with VFIO_DEVICE_RESET? If we don't support singletion dev_set hot-reset, noiommu devices in cdev path shall fail the hot-reset if empty-fd array is provided. But we may just document that empty-fd array does not work for noiommu. User should use the device fd array. Regards, Yi Liu
On 4/4/23 22:18, Alex Williamson wrote: > On Sat, 1 Apr 2023 07:44:22 -0700 > Yi Liu <yi.l.liu@intel.com> wrote: > >> as an alternative method for ownership check when iommufd is used. In >> this case all opened devices in the affected dev_set are verified to >> be bound to a same valid iommufd value to allow reset. It's simpler >> and faster as user does not need to pass a set of fds and kernel no >> need to search the device within the given fds. >> >> a device in noiommu mode doesn't have a valid iommufd, so this method >> should not be used in a dev_set which contains multiple devices and one >> of them is in noiommu. The only allowed noiommu scenario is that the >> calling device is noiommu and it's in a singleton dev_set. >> >> Suggested-by: Jason Gunthorpe <jgg@nvidia.com> >> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> >> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com> >> Tested-by: Yanting Jiang <yanting.jiang@intel.com> >> Signed-off-by: Yi Liu <yi.l.liu@intel.com> >> --- >> drivers/vfio/pci/vfio_pci_core.c | 42 +++++++++++++++++++++++++++----- >> include/uapi/linux/vfio.h | 9 ++++++- >> 2 files changed, 44 insertions(+), 7 deletions(-) >> >> diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c >> index 3696b8e58445..b68fcba67a4b 100644 >> --- a/drivers/vfio/pci/vfio_pci_core.c >> +++ b/drivers/vfio/pci/vfio_pci_core.c >> @@ -180,7 +180,8 @@ static void vfio_pci_probe_mmaps(struct vfio_pci_core_device *vdev) >> struct vfio_pci_group_info; >> static void vfio_pci_dev_set_try_reset(struct vfio_device_set *dev_set); >> static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set, >> - struct vfio_pci_group_info *groups); >> + struct vfio_pci_group_info *groups, >> + struct iommufd_ctx *iommufd_ctx); >> >> /* >> * INTx masking requires the ability to disable INTx signaling via PCI_COMMAND >> @@ -1277,7 +1278,7 @@ vfio_pci_ioctl_pci_hot_reset_groups(struct vfio_pci_core_device *vdev, >> return ret; >> >> /* Somewhere between 1 and count is OK */ >> - if (!hdr->count || hdr->count > count) >> + if (hdr->count > count) >> return -EINVAL; >> >> group_fds = kcalloc(hdr->count, sizeof(*group_fds), GFP_KERNEL); >> @@ -1326,7 +1327,7 @@ vfio_pci_ioctl_pci_hot_reset_groups(struct vfio_pci_core_device *vdev, >> info.count = hdr->count; >> info.files = files; >> >> - ret = vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, &info); >> + ret = vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, &info, NULL); >> >> hot_reset_release: >> for (file_idx--; file_idx >= 0; file_idx--) >> @@ -1341,6 +1342,7 @@ static int vfio_pci_ioctl_pci_hot_reset(struct vfio_pci_core_device *vdev, >> { >> unsigned long minsz = offsetofend(struct vfio_pci_hot_reset, count); >> struct vfio_pci_hot_reset hdr; >> + struct iommufd_ctx *iommufd; >> bool slot = false; >> >> if (copy_from_user(&hdr, arg, minsz)) >> @@ -1355,7 +1357,12 @@ static int vfio_pci_ioctl_pci_hot_reset(struct vfio_pci_core_device *vdev, >> else if (pci_probe_reset_bus(vdev->pdev->bus)) >> return -ENODEV; >> >> - return vfio_pci_ioctl_pci_hot_reset_groups(vdev, &hdr, slot, arg); >> + if (hdr.count) >> + return vfio_pci_ioctl_pci_hot_reset_groups(vdev, &hdr, slot, arg); >> + >> + iommufd = vfio_iommufd_physical_ictx(&vdev->vdev); >> + >> + return vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, NULL, iommufd); >> } >> >> static int vfio_pci_ioctl_ioeventfd(struct vfio_pci_core_device *vdev, >> @@ -2327,6 +2334,9 @@ static bool vfio_dev_in_groups(struct vfio_pci_core_device *vdev, >> { >> unsigned int i; >> >> + if (!groups) >> + return false; >> + >> for (i = 0; i < groups->count; i++) >> if (vfio_file_has_dev(groups->files[i], &vdev->vdev)) >> return true; >> @@ -2402,13 +2412,25 @@ static int vfio_pci_dev_set_pm_runtime_get(struct vfio_device_set *dev_set) >> return ret; >> } >> >> +static bool vfio_dev_in_iommufd_ctx(struct vfio_pci_core_device *vdev, >> + struct iommufd_ctx *iommufd_ctx) >> +{ >> + struct iommufd_ctx *iommufd = vfio_iommufd_physical_ictx(&vdev->vdev); >> + >> + if (!iommufd) >> + return false; >> + >> + return iommufd == iommufd_ctx; >> +} >> + >> /* >> * We need to get memory_lock for each device, but devices can share mmap_lock, >> * therefore we need to zap and hold the vma_lock for each device, and only then >> * get each memory_lock. >> */ >> static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set, >> - struct vfio_pci_group_info *groups) >> + struct vfio_pci_group_info *groups, >> + struct iommufd_ctx *iommufd_ctx) >> { >> struct vfio_pci_core_device *cur_mem; >> struct vfio_pci_core_device *cur_vma; >> @@ -2448,9 +2470,17 @@ static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set, >> * >> * Otherwise all opened devices in the dev_set must be >> * contained by the set of groups provided by the user. >> + * >> + * If user provides a zero-length array, then all the >> + * opened devices must be bound to a same iommufd_ctx. >> + * >> + * If all above checks are failed, reset is allowed only if >> + * the calling device is in a singleton dev_set. >> */ >> if (cur_vma->vdev.open_count && >> - !vfio_dev_in_groups(cur_vma, groups)) { >> + !vfio_dev_in_groups(cur_vma, groups) && >> + !vfio_dev_in_iommufd_ctx(cur_vma, iommufd_ctx) && >> + (dev_set->device_count > 1)) { > This last condition looks buggy to me, we need all conditions to be > true to generate an error here, which means that for a singleton > dev_set, it doesn't matter what group fds are passed, if any, or whether > the iommufd context matches. I think in fact this means that the empty > array path is equally available for group use cases with a singleton > dev_set, but we don't enable it for multiple device dev_sets like we do > iommufd. > > You pointed out a previous issue with hot-reset info and no-iommu where > if other affected devices are not bound to vfio-pci the info ioctl > returns error. That's handled in the hot-reset ioctl by the fact that > all affected devices must be in the dev_set and therefore bound to > vfio-pci drivers. So it seems to me that aside from the spurious error > because we can't report an iommu group when none exists, and didn't > spot it to invent an invalid group for debugging, hot-reset otherwise > works with no-iommu just like it does for iommu backed devices. We > don't currently require singleton no-iommu dev_sets afaict. > > I'll also note that if the dev_set is singleton, this suggests that > pci_reset_function() can make use of bus reset, so a hot-reset is > accessible via VFIO_DEVICE_RESET if the appropriate reset method is > selected. > > Therefore, I think as written, the singleton dev_set hot-reset is > enabled for iommufd and (unintentionally?) for the group path, while > also negating a requirement for a group fd or that a provided group fd > actually matches the device in this latter case. The null-array > approach is not however extended to groups for more general use. > Additionally, limiting no-iommu hot-reset to singleton dev_sets > provides only a marginal functional difference vs VFIO_DEVICE_RESET. > Thanks, > > Alex What bout introducing a helper static bool is_reset_ok(pdev, groups, ctx) { if (!pdev->vdev.open_count) return true; if (groups && vfio_dev_in_groups(pdev, groups)) return true; if (ctx && vfio_dev_in_iommufd_ctx(pdev, ctx) return true; return false; } Assuming the above logic is correct I think this would make the code more readable Thanks Eric >> ret = -EINVAL; >> goto err_undo; >> } >> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h >> index f96e5689cffc..17aa5d09db41 100644 >> --- a/include/uapi/linux/vfio.h >> +++ b/include/uapi/linux/vfio.h >> @@ -679,7 +679,14 @@ struct vfio_pci_hot_reset_info { >> * the calling user must ensure all affected devices, if opened, are >> * owned by itself. >> * >> - * The ownership is proved by an array of group fds. >> + * The ownership can be proved by: >> + * - An array of group fds >> + * - A zero-length array >> + * >> + * In the last case all affected devices which are opened by this user >> + * must have been bound to a same iommufd. If the calling device is in >> + * noiommu mode (no valid iommufd) then it can be reset only if the reset >> + * doesn't affect other devices. >> * >> * Return: 0 on success, -errno on failure. >> */
Hi Eric, > From: Eric Auger <eric.auger@redhat.com> > Sent: Wednesday, April 5, 2023 4:02 PM > > On 4/4/23 22:18, Alex Williamson wrote: > > On Sat, 1 Apr 2023 07:44:22 -0700 > > Yi Liu <yi.l.liu@intel.com> wrote: > > > >> as an alternative method for ownership check when iommufd is used. In > >> this case all opened devices in the affected dev_set are verified to > >> be bound to a same valid iommufd value to allow reset. It's simpler > >> and faster as user does not need to pass a set of fds and kernel no > >> need to search the device within the given fds. > >> > >> a device in noiommu mode doesn't have a valid iommufd, so this method > >> should not be used in a dev_set which contains multiple devices and one > >> of them is in noiommu. The only allowed noiommu scenario is that the > >> calling device is noiommu and it's in a singleton dev_set. > >> > >> Suggested-by: Jason Gunthorpe <jgg@nvidia.com> > >> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> > >> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com> > >> Tested-by: Yanting Jiang <yanting.jiang@intel.com> > >> Signed-off-by: Yi Liu <yi.l.liu@intel.com> > >> --- > >> drivers/vfio/pci/vfio_pci_core.c | 42 +++++++++++++++++++++++++++----- > >> include/uapi/linux/vfio.h | 9 ++++++- > >> 2 files changed, 44 insertions(+), 7 deletions(-) > >> > >> diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c > >> index 3696b8e58445..b68fcba67a4b 100644 > >> --- a/drivers/vfio/pci/vfio_pci_core.c > >> +++ b/drivers/vfio/pci/vfio_pci_core.c > >> @@ -180,7 +180,8 @@ static void vfio_pci_probe_mmaps(struct > vfio_pci_core_device *vdev) > >> struct vfio_pci_group_info; > >> static void vfio_pci_dev_set_try_reset(struct vfio_device_set *dev_set); > >> static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set, > >> - struct vfio_pci_group_info *groups); > >> + struct vfio_pci_group_info *groups, > >> + struct iommufd_ctx *iommufd_ctx); > >> > >> /* > >> * INTx masking requires the ability to disable INTx signaling via PCI_COMMAND > >> @@ -1277,7 +1278,7 @@ vfio_pci_ioctl_pci_hot_reset_groups(struct > vfio_pci_core_device *vdev, > >> return ret; > >> > >> /* Somewhere between 1 and count is OK */ > >> - if (!hdr->count || hdr->count > count) > >> + if (hdr->count > count) > >> return -EINVAL; > >> > >> group_fds = kcalloc(hdr->count, sizeof(*group_fds), GFP_KERNEL); > >> @@ -1326,7 +1327,7 @@ vfio_pci_ioctl_pci_hot_reset_groups(struct > vfio_pci_core_device *vdev, > >> info.count = hdr->count; > >> info.files = files; > >> > >> - ret = vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, &info); > >> + ret = vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, &info, NULL); > >> > >> hot_reset_release: > >> for (file_idx--; file_idx >= 0; file_idx--) > >> @@ -1341,6 +1342,7 @@ static int vfio_pci_ioctl_pci_hot_reset(struct > vfio_pci_core_device *vdev, > >> { > >> unsigned long minsz = offsetofend(struct vfio_pci_hot_reset, count); > >> struct vfio_pci_hot_reset hdr; > >> + struct iommufd_ctx *iommufd; > >> bool slot = false; > >> > >> if (copy_from_user(&hdr, arg, minsz)) > >> @@ -1355,7 +1357,12 @@ static int vfio_pci_ioctl_pci_hot_reset(struct > vfio_pci_core_device *vdev, > >> else if (pci_probe_reset_bus(vdev->pdev->bus)) > >> return -ENODEV; > >> > >> - return vfio_pci_ioctl_pci_hot_reset_groups(vdev, &hdr, slot, arg); > >> + if (hdr.count) > >> + return vfio_pci_ioctl_pci_hot_reset_groups(vdev, &hdr, slot, arg); > >> + > >> + iommufd = vfio_iommufd_physical_ictx(&vdev->vdev); > >> + > >> + return vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, NULL, iommufd); > >> } > >> > >> static int vfio_pci_ioctl_ioeventfd(struct vfio_pci_core_device *vdev, > >> @@ -2327,6 +2334,9 @@ static bool vfio_dev_in_groups(struct > vfio_pci_core_device *vdev, > >> { > >> unsigned int i; > >> > >> + if (!groups) > >> + return false; > >> + > >> for (i = 0; i < groups->count; i++) > >> if (vfio_file_has_dev(groups->files[i], &vdev->vdev)) > >> return true; > >> @@ -2402,13 +2412,25 @@ static int vfio_pci_dev_set_pm_runtime_get(struct > vfio_device_set *dev_set) > >> return ret; > >> } > >> > >> +static bool vfio_dev_in_iommufd_ctx(struct vfio_pci_core_device *vdev, > >> + struct iommufd_ctx *iommufd_ctx) > >> +{ > >> + struct iommufd_ctx *iommufd = vfio_iommufd_physical_ictx(&vdev->vdev); > >> + > >> + if (!iommufd) > >> + return false; > >> + > >> + return iommufd == iommufd_ctx; > >> +} > >> + > >> /* > >> * We need to get memory_lock for each device, but devices can share mmap_lock, > >> * therefore we need to zap and hold the vma_lock for each device, and only then > >> * get each memory_lock. > >> */ > >> static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set, > >> - struct vfio_pci_group_info *groups) > >> + struct vfio_pci_group_info *groups, > >> + struct iommufd_ctx *iommufd_ctx) > >> { > >> struct vfio_pci_core_device *cur_mem; > >> struct vfio_pci_core_device *cur_vma; > >> @@ -2448,9 +2470,17 @@ static int vfio_pci_dev_set_hot_reset(struct > vfio_device_set *dev_set, > >> * > >> * Otherwise all opened devices in the dev_set must be > >> * contained by the set of groups provided by the user. > >> + * > >> + * If user provides a zero-length array, then all the > >> + * opened devices must be bound to a same iommufd_ctx. > >> + * > >> + * If all above checks are failed, reset is allowed only if > >> + * the calling device is in a singleton dev_set. > >> */ > >> if (cur_vma->vdev.open_count && > >> - !vfio_dev_in_groups(cur_vma, groups)) { > >> + !vfio_dev_in_groups(cur_vma, groups) && > >> + !vfio_dev_in_iommufd_ctx(cur_vma, iommufd_ctx) && > >> + (dev_set->device_count > 1)) { > > This last condition looks buggy to me, we need all conditions to be > > true to generate an error here, which means that for a singleton > > dev_set, it doesn't matter what group fds are passed, if any, or whether > > the iommufd context matches. I think in fact this means that the empty > > array path is equally available for group use cases with a singleton > > dev_set, but we don't enable it for multiple device dev_sets like we do > > iommufd. > > > > You pointed out a previous issue with hot-reset info and no-iommu where > > if other affected devices are not bound to vfio-pci the info ioctl > > returns error. That's handled in the hot-reset ioctl by the fact that > > all affected devices must be in the dev_set and therefore bound to > > vfio-pci drivers. So it seems to me that aside from the spurious error > > because we can't report an iommu group when none exists, and didn't > > spot it to invent an invalid group for debugging, hot-reset otherwise > > works with no-iommu just like it does for iommu backed devices. We > > don't currently require singleton no-iommu dev_sets afaict. > > > > I'll also note that if the dev_set is singleton, this suggests that > > pci_reset_function() can make use of bus reset, so a hot-reset is > > accessible via VFIO_DEVICE_RESET if the appropriate reset method is > > selected. > > > > Therefore, I think as written, the singleton dev_set hot-reset is > > enabled for iommufd and (unintentionally?) for the group path, while > > also negating a requirement for a group fd or that a provided group fd > > actually matches the device in this latter case. The null-array > > approach is not however extended to groups for more general use. > > Additionally, limiting no-iommu hot-reset to singleton dev_sets > > provides only a marginal functional difference vs VFIO_DEVICE_RESET. > > Thanks, > > > > Alex > What bout introducing a helper > static bool is_reset_ok(pdev, groups, ctx) { > if (!pdev->vdev.open_count) > return true; > if (groups && vfio_dev_in_groups(pdev, groups)) > return true; > if (ctx && vfio_dev_in_iommufd_ctx(pdev, ctx) > return true; > return false; > } > > Assuming the above logic is correct I think this would make the code > more readable this logic may fail the noiommu devices in the cdev path as the cdev path binds the devices to iommufd==-1. The ctx would be NULL. So we agreed to allow the reset if the dev_set is sigletion. Detail can be found in below paragraph. As I replied in another email. Maybe this singleton support can be dropped since singleton dev_set may just do reset with VFIO_DEVICE_RESET. Alex may correct me if userspace is not so intelligent. "However the iommufd method has difficulty working with noiommu devices since those devices don't have a valid iommufd, unless the noiommu device is in a singleton dev_set hence no ownership check is required. [3] [3] https://lore.kernel.org/kvm/ZACX+Np%2FIY7ygqL5@nvidia.com/" Regards, Yi Liu > Thanks > > Eric > >> ret = -EINVAL; > >> goto err_undo; > >> } > >> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h > >> index f96e5689cffc..17aa5d09db41 100644 > >> --- a/include/uapi/linux/vfio.h > >> +++ b/include/uapi/linux/vfio.h > >> @@ -679,7 +679,14 @@ struct vfio_pci_hot_reset_info { > >> * the calling user must ensure all affected devices, if opened, are > >> * owned by itself. > >> * > >> - * The ownership is proved by an array of group fds. > >> + * The ownership can be proved by: > >> + * - An array of group fds > >> + * - A zero-length array > >> + * > >> + * In the last case all affected devices which are opened by this user > >> + * must have been bound to a same iommufd. If the calling device is in > >> + * noiommu mode (no valid iommufd) then it can be reset only if the reset > >> + * doesn't affect other devices. > >> * > >> * Return: 0 on success, -errno on failure. > >> */
On Wed, 5 Apr 2023 08:01:49 +0000 "Liu, Yi L" <yi.l.liu@intel.com> wrote: > > From: Liu, Yi L <yi.l.liu@intel.com> > > Sent: Wednesday, April 5, 2023 3:55 PM > > > > > > > Therefore, I think as written, the singleton dev_set hot-reset is > > > enabled for iommufd and (unintentionally?) for the group path, while > > > also negating a requirement for a group fd or that a provided group fd > > > actually matches the device in this latter case. The null-array > > > approach is not however extended to groups for more general use. > > > Additionally, limiting no-iommu hot-reset to singleton dev_sets > > > provides only a marginal functional difference vs VFIO_DEVICE_RESET. > > > > I think the singletion dev_set hot-reset is for iommufd (or more accurately > > for the noiommu case in cdev path). > > but actually, singleton dev_set hot-reset can work for group path as well. > Based on this, I'm also wondering do we really want to have singleton dev_set > hot-reset only for cdev noiommu case? or we allow it generally or just > don't support it as it is equivalent with VFIO_DEVICE_RESET? I think you're taking the potential that VFIO_DEVICE_RESET and hot-reset could do the same thing too far. The former is more likely to do an FLR, or even a PM reset. QEMU even tries to guess what reset VFIO_DEVICE_RESET might use in order to choose to do a hot-reset if it seems like the device might only support a PM reset otherwise. Changing the reset method of a device requires privilege, which is maybe something we'd compromise on for no-iommu, but the general expectation is that VFIO_DEVICE_RESET provides a device level scope and hot-reset provides a... hot-reset, and sometimes those are the same thing, but that doesn't mean we can lean on the former. > If we don't support singletion dev_set hot-reset, noiommu devices in cdev > path shall fail the hot-reset if empty-fd array is provided. But we may just > document that empty-fd array does not work for noiommu. User should > use the device fd array. I don't see any replies to my comment on 08/12 where I again question why we need an empty array option. It's causing all sorts of headaches and I don't see the justification for it beyond some hand waving that it reduces complexity for the user. This singleton dev-set notion seems equally unjustified. Do we just need to deal with hot-reset being unsupported for no-iommu devices with iommufd? Thanks, Alex
On Wed, Apr 05, 2023 at 09:36:46AM -0600, Alex Williamson wrote: > > If we don't support singletion dev_set hot-reset, noiommu devices in cdev > > path shall fail the hot-reset if empty-fd array is provided. But we may just > > document that empty-fd array does not work for noiommu. User should > > use the device fd array. > > I don't see any replies to my comment on 08/12 where I again question > why we need an empty array option. I was pressing we'd do empty-fd only and not do the device fd array at all since it is such an ugly fit for the use cases we have. But it is such a minor detail if you don't want it then take it out. > This singleton dev-set notion seems equally unjustified. Do we just > need to deal with hot-reset being unsupported for no-iommu devices > with iommufd? It was to support no-iommu, if you want to de-support it then it can go away too. AFAIK dpdk doesn't use this feature and it is the only user we know of that has support for no-iommu so it is probably safe. Jason
diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index 3696b8e58445..b68fcba67a4b 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -180,7 +180,8 @@ static void vfio_pci_probe_mmaps(struct vfio_pci_core_device *vdev) struct vfio_pci_group_info; static void vfio_pci_dev_set_try_reset(struct vfio_device_set *dev_set); static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set, - struct vfio_pci_group_info *groups); + struct vfio_pci_group_info *groups, + struct iommufd_ctx *iommufd_ctx); /* * INTx masking requires the ability to disable INTx signaling via PCI_COMMAND @@ -1277,7 +1278,7 @@ vfio_pci_ioctl_pci_hot_reset_groups(struct vfio_pci_core_device *vdev, return ret; /* Somewhere between 1 and count is OK */ - if (!hdr->count || hdr->count > count) + if (hdr->count > count) return -EINVAL; group_fds = kcalloc(hdr->count, sizeof(*group_fds), GFP_KERNEL); @@ -1326,7 +1327,7 @@ vfio_pci_ioctl_pci_hot_reset_groups(struct vfio_pci_core_device *vdev, info.count = hdr->count; info.files = files; - ret = vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, &info); + ret = vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, &info, NULL); hot_reset_release: for (file_idx--; file_idx >= 0; file_idx--) @@ -1341,6 +1342,7 @@ static int vfio_pci_ioctl_pci_hot_reset(struct vfio_pci_core_device *vdev, { unsigned long minsz = offsetofend(struct vfio_pci_hot_reset, count); struct vfio_pci_hot_reset hdr; + struct iommufd_ctx *iommufd; bool slot = false; if (copy_from_user(&hdr, arg, minsz)) @@ -1355,7 +1357,12 @@ static int vfio_pci_ioctl_pci_hot_reset(struct vfio_pci_core_device *vdev, else if (pci_probe_reset_bus(vdev->pdev->bus)) return -ENODEV; - return vfio_pci_ioctl_pci_hot_reset_groups(vdev, &hdr, slot, arg); + if (hdr.count) + return vfio_pci_ioctl_pci_hot_reset_groups(vdev, &hdr, slot, arg); + + iommufd = vfio_iommufd_physical_ictx(&vdev->vdev); + + return vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, NULL, iommufd); } static int vfio_pci_ioctl_ioeventfd(struct vfio_pci_core_device *vdev, @@ -2327,6 +2334,9 @@ static bool vfio_dev_in_groups(struct vfio_pci_core_device *vdev, { unsigned int i; + if (!groups) + return false; + for (i = 0; i < groups->count; i++) if (vfio_file_has_dev(groups->files[i], &vdev->vdev)) return true; @@ -2402,13 +2412,25 @@ static int vfio_pci_dev_set_pm_runtime_get(struct vfio_device_set *dev_set) return ret; } +static bool vfio_dev_in_iommufd_ctx(struct vfio_pci_core_device *vdev, + struct iommufd_ctx *iommufd_ctx) +{ + struct iommufd_ctx *iommufd = vfio_iommufd_physical_ictx(&vdev->vdev); + + if (!iommufd) + return false; + + return iommufd == iommufd_ctx; +} + /* * We need to get memory_lock for each device, but devices can share mmap_lock, * therefore we need to zap and hold the vma_lock for each device, and only then * get each memory_lock. */ static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set, - struct vfio_pci_group_info *groups) + struct vfio_pci_group_info *groups, + struct iommufd_ctx *iommufd_ctx) { struct vfio_pci_core_device *cur_mem; struct vfio_pci_core_device *cur_vma; @@ -2448,9 +2470,17 @@ static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set, * * Otherwise all opened devices in the dev_set must be * contained by the set of groups provided by the user. + * + * If user provides a zero-length array, then all the + * opened devices must be bound to a same iommufd_ctx. + * + * If all above checks are failed, reset is allowed only if + * the calling device is in a singleton dev_set. */ if (cur_vma->vdev.open_count && - !vfio_dev_in_groups(cur_vma, groups)) { + !vfio_dev_in_groups(cur_vma, groups) && + !vfio_dev_in_iommufd_ctx(cur_vma, iommufd_ctx) && + (dev_set->device_count > 1)) { ret = -EINVAL; goto err_undo; } diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index f96e5689cffc..17aa5d09db41 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -679,7 +679,14 @@ struct vfio_pci_hot_reset_info { * the calling user must ensure all affected devices, if opened, are * owned by itself. * - * The ownership is proved by an array of group fds. + * The ownership can be proved by: + * - An array of group fds + * - A zero-length array + * + * In the last case all affected devices which are opened by this user + * must have been bound to a same iommufd. If the calling device is in + * noiommu mode (no valid iommufd) then it can be reset only if the reset + * doesn't affect other devices. * * Return: 0 on success, -errno on failure. */