Message ID | 9-v1-a8faf768d202+125dd-vfio_mdev_no_group_jgg@nvidia.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Make the rest of the VFIO driver interface use vfio_device | expand |
On Tue, Apr 12, 2022 at 12:53:36PM -0300, Jason Gunthorpe wrote: > + if (WARN_ON(!READ_ONCE(vdev->open_count))) > + return -EINVAL; I think all the WARN_ON()s in this patch need to be WARN_ON_ONCE, otherwise there will be too many backtraces to be useful if a driver ever gets the API wrong. Otherwise looks good: Reviewed-by: Christoph Hellwig <hch@lst.de>
On Wed, Apr 13, 2022 at 08:11:05AM +0200, Christoph Hellwig wrote: > On Tue, Apr 12, 2022 at 12:53:36PM -0300, Jason Gunthorpe wrote: > > + if (WARN_ON(!READ_ONCE(vdev->open_count))) > > + return -EINVAL; > > I think all the WARN_ON()s in this patch need to be WARN_ON_ONCE, > otherwise there will be too many backtraces to be useful if a driver > ever gets the API wrong. Sure, I added a wrapper to make that have less overhead and merged it with the other 'driver is calling this correctly' checks: @@ -1330,6 +1330,12 @@ static int vfio_group_add_container_user(struct vfio_group *group) static const struct file_operations vfio_device_fops; +/* true if the vfio_device has open_device() called but not close_device() */ +static bool vfio_assert_device_open(struct vfio_device *device) +{ + return !WARN_ON_ONCE(!READ_ONCE(device->open_count)); +} + static int vfio_group_get_device_fd(struct vfio_group *group, char *buf) { struct vfio_device *device; @@ -1544,6 +1550,7 @@ static int vfio_device_fops_release(struct inode *inode, struct file *filep) struct vfio_device *device = filep->private_data; mutex_lock(&device->dev_set->lock); + vfio_assert_device_open(device); if (!--device->open_count && device->ops->close_device) device->ops->close_device(device); mutex_unlock(&device->dev_set->lock); @@ -2112,7 +2119,7 @@ int vfio_pin_pages(struct vfio_device *vdev, unsigned long *user_pfn, int npage, struct vfio_iommu_driver *driver; int ret; - if (!user_pfn || !phys_pfn || !npage) + if (!user_pfn || !phys_pfn || !npage || !vfio_assert_device_open(vdev)) return -EINVAL; if (npage > VFIO_PIN_PAGES_MAX_ENTRIES) @@ -2121,9 +2128,6 @@ int vfio_pin_pages(struct vfio_device *vdev, unsigned long *user_pfn, int npage, if (group->dev_counter > 1) return -EINVAL; - if (WARN_ON(!READ_ONCE(vdev->open_count))) - return -EINVAL; - container = group->container; driver = container->iommu_driver; if (likely(driver && driver->ops->pin_pages)) @@ -2153,15 +2157,12 @@ int vfio_unpin_pages(struct vfio_device *vdev, unsigned long *user_pfn, struct vfio_iommu_driver *driver; int ret; - if (!user_pfn || !npage) + if (!user_pfn || !npage || !vfio_assert_device_open(vdev)) return -EINVAL; if (npage > VFIO_PIN_PAGES_MAX_ENTRIES) return -E2BIG; - if (WARN_ON(!READ_ONCE(vdev->open_count))) - return -EINVAL; - container = vdev->group->container; driver = container->iommu_driver; if (likely(driver && driver->ops->unpin_pages)) @@ -2198,10 +2199,7 @@ int vfio_dma_rw(struct vfio_device *vdev, dma_addr_t user_iova, struct vfio_iommu_driver *driver; int ret = 0; - if (!data || len <= 0) - return -EINVAL; - - if (WARN_ON(!READ_ONCE(vdev->open_count))) + if (!data || len <= 0 || !vfio_assert_device_open(vdev)) return -EINVAL; container = vdev->group->container; @@ -2294,10 +2292,7 @@ int vfio_register_notifier(struct vfio_device *dev, enum vfio_notify_type type, struct vfio_group *group = dev->group; int ret; - if (!nb || !events || (*events == 0)) - return -EINVAL; - - if (WARN_ON(!READ_ONCE(dev->open_count))) + if (!nb || !events || (*events == 0) || !vfio_assert_device_open(dev)) return -EINVAL; switch (type) { @@ -2321,10 +2316,7 @@ int vfio_unregister_notifier(struct vfio_device *dev, struct vfio_group *group = dev->group; int ret; - if (!nb) - return -EINVAL; - - if (WARN_ON(!READ_ONCE(dev->open_count))) + if (!nb || !vfio_assert_device_open(dev)) return -EINVAL; switch (type) { Thanks, Jason
On Wed, Apr 13, 2022 at 11:03:05AM -0300, Jason Gunthorpe wrote: > On Wed, Apr 13, 2022 at 08:11:05AM +0200, Christoph Hellwig wrote: > > On Tue, Apr 12, 2022 at 12:53:36PM -0300, Jason Gunthorpe wrote: > > > + if (WARN_ON(!READ_ONCE(vdev->open_count))) > > > + return -EINVAL; > > > > I think all the WARN_ON()s in this patch need to be WARN_ON_ONCE, > > otherwise there will be too many backtraces to be useful if a driver > > ever gets the API wrong. > > Sure, I added a wrapper to make that have less overhead and merged it > with the other 'driver is calling this correctly' checks: Looks good: Reviewed-by: Christoph Hellwig <hch@lst.de>
On 4/12/22 11:53 AM, Jason Gunthorpe wrote: > When the open_device() op is called the container_users is incremented and > held incremented until close_device(). Thus, so long as drivers call > functions within their open_device()/close_device() region they do not > need to worry about the container_users. > > These functions can all only be called between > open_device()/close_device(): > > vfio_pin_pages() > vfio_unpin_pages() > vfio_dma_rw() > vfio_register_notifier() > vfio_unregister_notifier() > > So eliminate the calls to vfio_group_add_container_user() and add a simple > WARN_ON to detect mis-use by drivers. > vfio_device_fops_release decrements dev->open_count immediately before calling dev->ops->close_device, which means we could enter close_device with a dev_count of 0. Maybe vfio_device_fops_release should handle the same way as vfio_group_get_device_fd? if (device->open_count == 1 && device->ops->close_device) device->ops->close_device(device); device->open_count--;
On Thu, Apr 14, 2022 at 09:51:49AM -0400, Matthew Rosato wrote: > On 4/12/22 11:53 AM, Jason Gunthorpe wrote: > > When the open_device() op is called the container_users is incremented and > > held incremented until close_device(). Thus, so long as drivers call > > functions within their open_device()/close_device() region they do not > > need to worry about the container_users. > > > > These functions can all only be called between > > open_device()/close_device(): > > > > vfio_pin_pages() > > vfio_unpin_pages() > > vfio_dma_rw() > > vfio_register_notifier() > > vfio_unregister_notifier() > > > > So eliminate the calls to vfio_group_add_container_user() and add a simple > > WARN_ON to detect mis-use by drivers. > > > > vfio_device_fops_release decrements dev->open_count immediately before > calling dev->ops->close_device, which means we could enter close_device with > a dev_count of 0. > > Maybe vfio_device_fops_release should handle the same way as > vfio_group_get_device_fd? > > if (device->open_count == 1 && device->ops->close_device) > device->ops->close_device(device); > device->open_count--; Yes, thanks alot! I have nothing to test these flows on... It matches the ordering in the only other place to call close_device. I folded this into the patch: diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 0f735f9f206002..29761f0cf0a227 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -1551,8 +1551,9 @@ static int vfio_device_fops_release(struct inode *inode, struct file *filep) mutex_lock(&device->dev_set->lock); vfio_assert_device_open(device); - if (!--device->open_count && device->ops->close_device) + if (device->open_count == 1 && device->ops->close_device) device->ops->close_device(device); + device->open_count--; mutex_unlock(&device->dev_set->lock); module_put(device->dev->driver->owner); Jason
> From: Jason Gunthorpe <jgg@nvidia.com> > Sent: Thursday, April 14, 2022 10:22 PM > > On Thu, Apr 14, 2022 at 09:51:49AM -0400, Matthew Rosato wrote: > > On 4/12/22 11:53 AM, Jason Gunthorpe wrote: > > > When the open_device() op is called the container_users is incremented > and > > > held incremented until close_device(). Thus, so long as drivers call > > > functions within their open_device()/close_device() region they do not > > > need to worry about the container_users. > > > > > > These functions can all only be called between > > > open_device()/close_device(): > > > > > > vfio_pin_pages() > > > vfio_unpin_pages() > > > vfio_dma_rw() > > > vfio_register_notifier() > > > vfio_unregister_notifier() > > > > > > So eliminate the calls to vfio_group_add_container_user() and add a > simple > > > WARN_ON to detect mis-use by drivers. > > > > > > > vfio_device_fops_release decrements dev->open_count immediately > before > > calling dev->ops->close_device, which means we could enter close_device > with > > a dev_count of 0. > > > > Maybe vfio_device_fops_release should handle the same way as > > vfio_group_get_device_fd? > > > > if (device->open_count == 1 && device->ops->close_device) > > device->ops->close_device(device); > > device->open_count--; > > Yes, thanks alot! I have nothing to test these flows on... > > It matches the ordering in the only other place to call close_device. > > I folded this into the patch: While it's a welcomed fix is it actually related to this series? The point of this patch is that those functions are called when container_users is non-zero. This is true even without this fix given container_users is decremented after calling device->ops->close_device(). iiuc this might be better sent out as a separate fix out of this series? Or at least add a comment in the commit msg about taking chance to fix an unrelated issue to not cause confusion... Thanks Kevin
On Fri, Apr 15, 2022 at 02:32:08AM +0000, Tian, Kevin wrote: > While it's a welcomed fix is it actually related to this series? The point > of this patch is that those functions are called when container_users > is non-zero. This is true even without this fix given container_users > is decremented after calling device->ops->close_device(). It isn't, it is decremented before which causes it to be 0 when the assertions are called. Jason
> From: Jason Gunthorpe <jgg@nvidia.com> > Sent: Friday, April 15, 2022 8:07 PM > > On Fri, Apr 15, 2022 at 02:32:08AM +0000, Tian, Kevin wrote: > > > While it's a welcomed fix is it actually related to this series? The point > > of this patch is that those functions are called when container_users > > is non-zero. This is true even without this fix given container_users > > is decremented after calling device->ops->close_device(). > > It isn't, it is decremented before which causes it to be 0 when the > assertions are called. > right, it's quite obvious when I read it the second time.
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 3d75505bf3cc26..ab0c3f5635905c 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -2121,9 +2121,8 @@ int vfio_pin_pages(struct vfio_device *vdev, unsigned long *user_pfn, int npage, if (group->dev_counter > 1) return -EINVAL; - ret = vfio_group_add_container_user(group); - if (ret) - return ret; + if (WARN_ON(!READ_ONCE(vdev->open_count))) + return -EINVAL; container = group->container; driver = container->iommu_driver; @@ -2134,8 +2133,6 @@ int vfio_pin_pages(struct vfio_device *vdev, unsigned long *user_pfn, int npage, else ret = -ENOTTY; - vfio_group_try_dissolve_container(group); - return ret; } EXPORT_SYMBOL(vfio_pin_pages); @@ -2162,9 +2159,8 @@ int vfio_unpin_pages(struct vfio_device *vdev, unsigned long *user_pfn, if (npage > VFIO_PIN_PAGES_MAX_ENTRIES) return -E2BIG; - ret = vfio_group_add_container_user(vdev->group); - if (ret) - return ret; + if (WARN_ON(!READ_ONCE(vdev->open_count))) + return -EINVAL; container = vdev->group->container; driver = container->iommu_driver; @@ -2174,8 +2170,6 @@ int vfio_unpin_pages(struct vfio_device *vdev, unsigned long *user_pfn, else ret = -ENOTTY; - vfio_group_try_dissolve_container(vdev->group); - return ret; } EXPORT_SYMBOL(vfio_unpin_pages); @@ -2207,9 +2201,8 @@ int vfio_dma_rw(struct vfio_device *vdev, dma_addr_t user_iova, if (!data || len <= 0) return -EINVAL; - ret = vfio_group_add_container_user(vdev->group); - if (ret) - return ret; + if (WARN_ON(!READ_ONCE(vdev->open_count))) + return -EINVAL; container = vdev->group->container; driver = container->iommu_driver; @@ -2219,9 +2212,6 @@ int vfio_dma_rw(struct vfio_device *vdev, dma_addr_t user_iova, user_iova, data, len, write); else ret = -ENOTTY; - - vfio_group_try_dissolve_container(vdev->group); - return ret; } EXPORT_SYMBOL(vfio_dma_rw); @@ -2234,10 +2224,6 @@ static int vfio_register_iommu_notifier(struct vfio_group *group, struct vfio_iommu_driver *driver; int ret; - ret = vfio_group_add_container_user(group); - if (ret) - return -EINVAL; - container = group->container; driver = container->iommu_driver; if (likely(driver && driver->ops->register_notifier)) @@ -2245,9 +2231,6 @@ static int vfio_register_iommu_notifier(struct vfio_group *group, events, nb); else ret = -ENOTTY; - - vfio_group_try_dissolve_container(group); - return ret; } @@ -2258,10 +2241,6 @@ static int vfio_unregister_iommu_notifier(struct vfio_group *group, struct vfio_iommu_driver *driver; int ret; - ret = vfio_group_add_container_user(group); - if (ret) - return -EINVAL; - container = group->container; driver = container->iommu_driver; if (likely(driver && driver->ops->unregister_notifier)) @@ -2269,9 +2248,6 @@ static int vfio_unregister_iommu_notifier(struct vfio_group *group, nb); else ret = -ENOTTY; - - vfio_group_try_dissolve_container(group); - return ret; } @@ -2300,10 +2276,6 @@ static int vfio_register_group_notifier(struct vfio_group *group, if (*events) return -EINVAL; - ret = vfio_group_add_container_user(group); - if (ret) - return -EINVAL; - ret = blocking_notifier_chain_register(&group->notifier, nb); /* @@ -2313,25 +2285,6 @@ static int vfio_register_group_notifier(struct vfio_group *group, if (!ret && set_kvm && group->kvm) blocking_notifier_call_chain(&group->notifier, VFIO_GROUP_NOTIFY_SET_KVM, group->kvm); - - vfio_group_try_dissolve_container(group); - - return ret; -} - -static int vfio_unregister_group_notifier(struct vfio_group *group, - struct notifier_block *nb) -{ - int ret; - - ret = vfio_group_add_container_user(group); - if (ret) - return -EINVAL; - - ret = blocking_notifier_chain_unregister(&group->notifier, nb); - - vfio_group_try_dissolve_container(group); - return ret; } @@ -2344,6 +2297,9 @@ int vfio_register_notifier(struct vfio_device *dev, enum vfio_notify_type type, if (!nb || !events || (*events == 0)) return -EINVAL; + if (WARN_ON(!READ_ONCE(dev->open_count))) + return -EINVAL; + switch (type) { case VFIO_IOMMU_NOTIFY: ret = vfio_register_iommu_notifier(group, events, nb); @@ -2368,12 +2324,15 @@ int vfio_unregister_notifier(struct vfio_device *dev, if (!nb) return -EINVAL; + if (WARN_ON(!READ_ONCE(dev->open_count))) + return -EINVAL; + switch (type) { case VFIO_IOMMU_NOTIFY: ret = vfio_unregister_iommu_notifier(group, nb); break; case VFIO_GROUP_NOTIFY: - ret = vfio_unregister_group_notifier(group, nb); + ret = blocking_notifier_chain_unregister(&group->notifier, nb); break; default: ret = -EINVAL;
When the open_device() op is called the container_users is incremented and held incremented until close_device(). Thus, so long as drivers call functions within their open_device()/close_device() region they do not need to worry about the container_users. These functions can all only be called between open_device()/close_device(): vfio_pin_pages() vfio_unpin_pages() vfio_dma_rw() vfio_register_notifier() vfio_unregister_notifier() So eliminate the calls to vfio_group_add_container_user() and add a simple WARN_ON to detect mis-use by drivers. Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> --- drivers/vfio/vfio.c | 67 +++++++++------------------------------------ 1 file changed, 13 insertions(+), 54 deletions(-)