Message ID | 1477517366-27871-11-git-send-email-kwankhede@nvidia.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 10/27/2016 05:29 AM, Kirti Wankhede wrote: > Added blocking notifier to IOMMU TYPE1 driver to notify vendor drivers > about DMA_UNMAP. > Exported two APIs vfio_register_notifier() and vfio_unregister_notifier(). > Vendor driver should register notifer using these APIs. > Vendor driver should use VFIO_IOMMU_NOTIFY_DMA_UNMAP action to invalidate > mappings. > > Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com> > Signed-off-by: Neo Jia <cjia@nvidia.com> > Change-Id: I5910d0024d6be87f3e8d3e0ca0eaeaaa0b17f271 > --- > drivers/vfio/vfio.c | 73 +++++++++++++++++++++++++++++++++ > drivers/vfio/vfio_iommu_type1.c | 89 ++++++++++++++++++++++++++++++++++++----- > include/linux/vfio.h | 11 +++++ > 3 files changed, 163 insertions(+), 10 deletions(-) > > diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c > index 28b50ca14c52..ff05ac6b1e90 100644 > --- a/drivers/vfio/vfio.c > +++ b/drivers/vfio/vfio.c > @@ -1891,6 +1891,79 @@ err_unpin_pages: > } > EXPORT_SYMBOL(vfio_unpin_pages); > > +int vfio_register_notifier(struct device *dev, struct notifier_block *nb) > +{ > + struct vfio_container *container; > + struct vfio_group *group; > + struct vfio_iommu_driver *driver; > + ssize_t ret; > + > + if (!dev || !nb) > + return -EINVAL; > + > + group = vfio_group_get_from_dev(dev); > + if (IS_ERR(group)) > + return PTR_ERR(group); > + > + ret = vfio_group_add_container_user(group); > + if (ret) > + goto err_register_nb; > + > + container = group->container; > + down_read(&container->group_lock); > + > + driver = container->iommu_driver; > + if (likely(driver && driver->ops->register_notifier)) > + ret = driver->ops->register_notifier(container->iommu_data, nb); > + else > + ret = -EINVAL; > + > + up_read(&container->group_lock); > + vfio_group_try_dissolve_container(group); > + > +err_register_nb: > + vfio_group_put(group); > + return ret; > +} > +EXPORT_SYMBOL(vfio_register_notifier); > + > +int vfio_unregister_notifier(struct device *dev, struct notifier_block *nb) > +{ > + struct vfio_container *container; > + struct vfio_group *group; > + struct vfio_iommu_driver *driver; > + ssize_t ret; > + > + if (!dev || !nb) > + return -EINVAL; > + > + group = vfio_group_get_from_dev(dev); > + if (IS_ERR(group)) > + return PTR_ERR(group); > + > + ret = vfio_group_add_container_user(group); > + if (ret) > + goto err_unregister_nb; > + > + container = group->container; > + down_read(&container->group_lock); > + > + driver = container->iommu_driver; > + if (likely(driver && driver->ops->unregister_notifier)) > + ret = driver->ops->unregister_notifier(container->iommu_data, > + nb); > + else > + ret = -EINVAL; > + > + up_read(&container->group_lock); > + vfio_group_try_dissolve_container(group); > + > +err_unregister_nb: > + vfio_group_put(group); > + return ret; > +} > +EXPORT_SYMBOL(vfio_unregister_notifier); > + > /** > * Module/class support > */ > diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c > index 5add11a147e1..a4bd331ac0fd 100644 > --- a/drivers/vfio/vfio_iommu_type1.c > +++ b/drivers/vfio/vfio_iommu_type1.c > @@ -37,6 +37,7 @@ > #include <linux/vfio.h> > #include <linux/workqueue.h> > #include <linux/mdev.h> > +#include <linux/notifier.h> > > #define DRIVER_VERSION "0.2" > #define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>" > @@ -59,6 +60,7 @@ struct vfio_iommu { > struct vfio_domain *external_domain; /* domain for external user */ > struct mutex lock; > struct rb_root dma_list; > + struct blocking_notifier_head notifier; > bool v2; > bool nesting; > }; > @@ -549,7 +551,8 @@ static long vfio_iommu_type1_pin_pages(void *iommu_data, > > mutex_lock(&iommu->lock); > > - if (!iommu->external_domain) { > + /* Fail if notifier list is empty */ > + if ((!iommu->external_domain) || (!iommu->notifier.head)) { > ret = -EINVAL; > goto pin_done; > } > @@ -768,6 +771,50 @@ static unsigned long vfio_pgsize_bitmap(struct vfio_iommu *iommu) > return bitmap; > } > > +/* > + * This function finds pfn in domain->external_addr_space->pfn_list for given > + * iova range. If pfn exist, notify pfn to registered notifier list. On > + * receiving notifier callback, vendor driver should invalidate the mapping and > + * call vfio_unpin_pages() to unpin this pfn. With that vfio_pfn for this pfn > + * gets removed from rb tree of pfn_list. That re-arranges rb tree, so while > + * searching for next vfio_pfn in rb tree, start search from first node again. > + * If any vendor driver doesn't unpin that pfn, vfio_pfn would not get removed > + * from rb tree and so in next search vfio_pfn would be same as previous > + * vfio_pfn. In that case, exit from loop. > + */ > +static void vfio_notifier_call_chain(struct vfio_iommu *iommu, > + struct vfio_iommu_type1_dma_unmap *unmap) > +{ > + struct vfio_domain *domain = iommu->external_domain; > + struct rb_node *n; > + struct vfio_pfn *vpfn = NULL, *prev_vpfn; > + > + do { > + prev_vpfn = vpfn; > + mutex_lock(&domain->external_addr_space->pfn_list_lock); > + > + n = rb_first(&domain->external_addr_space->pfn_list); > + > + for (; n; n = rb_next(n), vpfn = NULL) { > + vpfn = rb_entry(n, struct vfio_pfn, node); > + > + if ((vpfn->iova >= unmap->iova) && > + (vpfn->iova < unmap->iova + unmap->size)) > + break; > + } > + > + mutex_unlock(&domain->external_addr_space->pfn_list_lock); > + > + /* Notify any listeners about DMA_UNMAP */ > + if (vpfn) > + blocking_notifier_call_chain(&iommu->notifier, > + VFIO_IOMMU_NOTIFY_DMA_UNMAP, > + &vpfn->pfn); Hi Kirti, The information carried by notifier is only a pfn. Since your pin/unpin interfaces design, it's the vendor driver who should guarantee pin/unpin same times. To achieve that, the vendor driver must cache it's iova->pfn mapping on its side, to avoid pinning a same page for multiple times. With the notifier carrying only a pfn, to find the iova by this pfn, the vendor driver must *also* keep a reverse-mapping. That's a bit too much. Since the vendor could also suffer from IOMMU-compatible problem, which means a local cache is always helpful, so I'd like to have the iova carried to the notifier. What'd you say? -- Thanks, Jike > + } while (vpfn && (prev_vpfn != vpfn)); > + > + WARN_ON(vpfn); > +} > + > static int vfio_dma_do_unmap(struct vfio_iommu *iommu, > struct vfio_iommu_type1_dma_unmap *unmap) > { > @@ -844,6 +891,9 @@ unlock: > /* Report how much was unmapped */ > unmap->size = unmapped; > > + if (unmapped && iommu->external_domain) > + vfio_notifier_call_chain(iommu, unmap); > + > return ret; > } > > @@ -1418,6 +1468,7 @@ static void *vfio_iommu_type1_open(unsigned long arg) > INIT_LIST_HEAD(&iommu->domain_list); > iommu->dma_list = RB_ROOT; > mutex_init(&iommu->lock); > + BLOCKING_INIT_NOTIFIER_HEAD(&iommu->notifier); > > return iommu; > } > @@ -1555,16 +1606,34 @@ static long vfio_iommu_type1_ioctl(void *iommu_data, > return -ENOTTY; > } > > +static int vfio_iommu_type1_register_notifier(void *iommu_data, > + struct notifier_block *nb) > +{ > + struct vfio_iommu *iommu = iommu_data; > + > + return blocking_notifier_chain_register(&iommu->notifier, nb); > +} > + > +static int vfio_iommu_type1_unregister_notifier(void *iommu_data, > + struct notifier_block *nb) > +{ > + struct vfio_iommu *iommu = iommu_data; > + > + return blocking_notifier_chain_unregister(&iommu->notifier, nb); > +} > + > static const struct vfio_iommu_driver_ops vfio_iommu_driver_ops_type1 = { > - .name = "vfio-iommu-type1", > - .owner = THIS_MODULE, > - .open = vfio_iommu_type1_open, > - .release = vfio_iommu_type1_release, > - .ioctl = vfio_iommu_type1_ioctl, > - .attach_group = vfio_iommu_type1_attach_group, > - .detach_group = vfio_iommu_type1_detach_group, > - .pin_pages = vfio_iommu_type1_pin_pages, > - .unpin_pages = vfio_iommu_type1_unpin_pages, > + .name = "vfio-iommu-type1", > + .owner = THIS_MODULE, > + .open = vfio_iommu_type1_open, > + .release = vfio_iommu_type1_release, > + .ioctl = vfio_iommu_type1_ioctl, > + .attach_group = vfio_iommu_type1_attach_group, > + .detach_group = vfio_iommu_type1_detach_group, > + .pin_pages = vfio_iommu_type1_pin_pages, > + .unpin_pages = vfio_iommu_type1_unpin_pages, > + .register_notifier = vfio_iommu_type1_register_notifier, > + .unregister_notifier = vfio_iommu_type1_unregister_notifier, > }; > > static int __init vfio_iommu_type1_init(void) > diff --git a/include/linux/vfio.h b/include/linux/vfio.h > index 0609a2052846..4c91ce8bfaeb 100644 > --- a/include/linux/vfio.h > +++ b/include/linux/vfio.h > @@ -80,6 +80,10 @@ struct vfio_iommu_driver_ops { > unsigned long *phys_pfn); > long (*unpin_pages)(void *iommu_data, unsigned long *pfn, > long npage); > + int (*register_notifier)(void *iommu_data, > + struct notifier_block *nb); > + int (*unregister_notifier)(void *iommu_data, > + struct notifier_block *nb); > }; > > extern int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops); > @@ -137,6 +141,13 @@ extern long vfio_pin_pages(struct device *dev, unsigned long *user_pfn, > extern long vfio_unpin_pages(struct device *dev, unsigned long *pfn, > long npage); > > +#define VFIO_IOMMU_NOTIFY_DMA_UNMAP 1 > + > +extern int vfio_register_notifier(struct device *dev, > + struct notifier_block *nb); > + > +extern int vfio_unregister_notifier(struct device *dev, > + struct notifier_block *nb); > /* > * IRQfd - generic > */ >
On 10/28/2016 1:03 PM, Jike Song wrote: > On 10/27/2016 05:29 AM, Kirti Wankhede wrote: >> >> +/* >> + * This function finds pfn in domain->external_addr_space->pfn_list for given >> + * iova range. If pfn exist, notify pfn to registered notifier list. On >> + * receiving notifier callback, vendor driver should invalidate the mapping and >> + * call vfio_unpin_pages() to unpin this pfn. With that vfio_pfn for this pfn >> + * gets removed from rb tree of pfn_list. That re-arranges rb tree, so while >> + * searching for next vfio_pfn in rb tree, start search from first node again. >> + * If any vendor driver doesn't unpin that pfn, vfio_pfn would not get removed >> + * from rb tree and so in next search vfio_pfn would be same as previous >> + * vfio_pfn. In that case, exit from loop. >> + */ >> +static void vfio_notifier_call_chain(struct vfio_iommu *iommu, >> + struct vfio_iommu_type1_dma_unmap *unmap) >> +{ >> + struct vfio_domain *domain = iommu->external_domain; >> + struct rb_node *n; >> + struct vfio_pfn *vpfn = NULL, *prev_vpfn; >> + >> + do { >> + prev_vpfn = vpfn; >> + mutex_lock(&domain->external_addr_space->pfn_list_lock); >> + >> + n = rb_first(&domain->external_addr_space->pfn_list); >> + >> + for (; n; n = rb_next(n), vpfn = NULL) { >> + vpfn = rb_entry(n, struct vfio_pfn, node); >> + >> + if ((vpfn->iova >= unmap->iova) && >> + (vpfn->iova < unmap->iova + unmap->size)) >> + break; >> + } >> + >> + mutex_unlock(&domain->external_addr_space->pfn_list_lock); >> + >> + /* Notify any listeners about DMA_UNMAP */ >> + if (vpfn) >> + blocking_notifier_call_chain(&iommu->notifier, >> + VFIO_IOMMU_NOTIFY_DMA_UNMAP, >> + &vpfn->pfn); > > Hi Kirti, > > The information carried by notifier is only a pfn. > > Since your pin/unpin interfaces design, it's the vendor driver who should > guarantee pin/unpin same times. That because there could be multiple mdev devices from different vendor driver in a domain, and pin request for a page can come from multiple vendor driver for same page and in pin/unpin page interface we don't keep track of who had pinned pages. So its vendor driver's responsibility to unpin pages which he had pinned. > To achieve that, the vendor driver must > cache it's iova->pfn mapping on its side, to avoid pinning a same page > for multiple times. > That's right. > With the notifier carrying only a pfn, to find the iova by this pfn, > the vendor driver must *also* keep a reverse-mapping. That's a bit > too much. > Don't know what the reserve mapping is, vendor should know the pfn that he has previously pinned to call the unpin anyway so there is no need to duplicate the logic here. Kirti > Since the vendor could also suffer from IOMMU-compatible problem, > which means a local cache is always helpful, so I'd like to have the > iova carried to the notifier. > > What'd you say? >
On Fri, 28 Oct 2016 15:33:58 +0800 Jike Song <jike.song@intel.com> wrote: > On 10/27/2016 05:29 AM, Kirti Wankhede wrote: > > Added blocking notifier to IOMMU TYPE1 driver to notify vendor drivers > > about DMA_UNMAP. > > Exported two APIs vfio_register_notifier() and vfio_unregister_notifier(). > > Vendor driver should register notifer using these APIs. > > Vendor driver should use VFIO_IOMMU_NOTIFY_DMA_UNMAP action to invalidate > > mappings. > > > > Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com> > > Signed-off-by: Neo Jia <cjia@nvidia.com> > > Change-Id: I5910d0024d6be87f3e8d3e0ca0eaeaaa0b17f271 > > --- > > drivers/vfio/vfio.c | 73 +++++++++++++++++++++++++++++++++ > > drivers/vfio/vfio_iommu_type1.c | 89 ++++++++++++++++++++++++++++++++++++----- > > include/linux/vfio.h | 11 +++++ > > 3 files changed, 163 insertions(+), 10 deletions(-) > > > > diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c > > index 28b50ca14c52..ff05ac6b1e90 100644 > > --- a/drivers/vfio/vfio.c > > +++ b/drivers/vfio/vfio.c > > @@ -1891,6 +1891,79 @@ err_unpin_pages: > > } > > EXPORT_SYMBOL(vfio_unpin_pages); > > > > +int vfio_register_notifier(struct device *dev, struct notifier_block *nb) > > +{ > > + struct vfio_container *container; > > + struct vfio_group *group; > > + struct vfio_iommu_driver *driver; > > + ssize_t ret; > > + > > + if (!dev || !nb) > > + return -EINVAL; > > + > > + group = vfio_group_get_from_dev(dev); > > + if (IS_ERR(group)) > > + return PTR_ERR(group); > > + > > + ret = vfio_group_add_container_user(group); > > + if (ret) > > + goto err_register_nb; > > + > > + container = group->container; > > + down_read(&container->group_lock); > > + > > + driver = container->iommu_driver; > > + if (likely(driver && driver->ops->register_notifier)) > > + ret = driver->ops->register_notifier(container->iommu_data, nb); > > + else > > + ret = -EINVAL; > > + > > + up_read(&container->group_lock); > > + vfio_group_try_dissolve_container(group); > > + > > +err_register_nb: > > + vfio_group_put(group); > > + return ret; > > +} > > +EXPORT_SYMBOL(vfio_register_notifier); > > + > > +int vfio_unregister_notifier(struct device *dev, struct notifier_block *nb) > > +{ > > + struct vfio_container *container; > > + struct vfio_group *group; > > + struct vfio_iommu_driver *driver; > > + ssize_t ret; > > + > > + if (!dev || !nb) > > + return -EINVAL; > > + > > + group = vfio_group_get_from_dev(dev); > > + if (IS_ERR(group)) > > + return PTR_ERR(group); > > + > > + ret = vfio_group_add_container_user(group); > > + if (ret) > > + goto err_unregister_nb; > > + > > + container = group->container; > > + down_read(&container->group_lock); > > + > > + driver = container->iommu_driver; > > + if (likely(driver && driver->ops->unregister_notifier)) > > + ret = driver->ops->unregister_notifier(container->iommu_data, > > + nb); > > + else > > + ret = -EINVAL; > > + > > + up_read(&container->group_lock); > > + vfio_group_try_dissolve_container(group); > > + > > +err_unregister_nb: > > + vfio_group_put(group); > > + return ret; > > +} > > +EXPORT_SYMBOL(vfio_unregister_notifier); > > + > > /** > > * Module/class support > > */ > > diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c > > index 5add11a147e1..a4bd331ac0fd 100644 > > --- a/drivers/vfio/vfio_iommu_type1.c > > +++ b/drivers/vfio/vfio_iommu_type1.c > > @@ -37,6 +37,7 @@ > > #include <linux/vfio.h> > > #include <linux/workqueue.h> > > #include <linux/mdev.h> > > +#include <linux/notifier.h> > > > > #define DRIVER_VERSION "0.2" > > #define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>" > > @@ -59,6 +60,7 @@ struct vfio_iommu { > > struct vfio_domain *external_domain; /* domain for external user */ > > struct mutex lock; > > struct rb_root dma_list; > > + struct blocking_notifier_head notifier; > > bool v2; > > bool nesting; > > }; > > @@ -549,7 +551,8 @@ static long vfio_iommu_type1_pin_pages(void *iommu_data, > > > > mutex_lock(&iommu->lock); > > > > - if (!iommu->external_domain) { > > + /* Fail if notifier list is empty */ > > + if ((!iommu->external_domain) || (!iommu->notifier.head)) { > > ret = -EINVAL; > > goto pin_done; > > } > > @@ -768,6 +771,50 @@ static unsigned long vfio_pgsize_bitmap(struct vfio_iommu *iommu) > > return bitmap; > > } > > > > +/* > > + * This function finds pfn in domain->external_addr_space->pfn_list for given > > + * iova range. If pfn exist, notify pfn to registered notifier list. On > > + * receiving notifier callback, vendor driver should invalidate the mapping and > > + * call vfio_unpin_pages() to unpin this pfn. With that vfio_pfn for this pfn > > + * gets removed from rb tree of pfn_list. That re-arranges rb tree, so while > > + * searching for next vfio_pfn in rb tree, start search from first node again. > > + * If any vendor driver doesn't unpin that pfn, vfio_pfn would not get removed > > + * from rb tree and so in next search vfio_pfn would be same as previous > > + * vfio_pfn. In that case, exit from loop. > > + */ > > +static void vfio_notifier_call_chain(struct vfio_iommu *iommu, > > + struct vfio_iommu_type1_dma_unmap *unmap) > > +{ > > + struct vfio_domain *domain = iommu->external_domain; > > + struct rb_node *n; > > + struct vfio_pfn *vpfn = NULL, *prev_vpfn; > > + > > + do { > > + prev_vpfn = vpfn; > > + mutex_lock(&domain->external_addr_space->pfn_list_lock); > > + > > + n = rb_first(&domain->external_addr_space->pfn_list); > > + > > + for (; n; n = rb_next(n), vpfn = NULL) { > > + vpfn = rb_entry(n, struct vfio_pfn, node); > > + > > + if ((vpfn->iova >= unmap->iova) && > > + (vpfn->iova < unmap->iova + unmap->size)) > > + break; > > + } > > + > > + mutex_unlock(&domain->external_addr_space->pfn_list_lock); > > + > > + /* Notify any listeners about DMA_UNMAP */ > > + if (vpfn) > > + blocking_notifier_call_chain(&iommu->notifier, > > + VFIO_IOMMU_NOTIFY_DMA_UNMAP, > > + &vpfn->pfn); > > Hi Kirti, > > The information carried by notifier is only a pfn. > > Since your pin/unpin interfaces design, it's the vendor driver who should > guarantee pin/unpin same times. To achieve that, the vendor driver must > cache it's iova->pfn mapping on its side, to avoid pinning a same page > for multiple times. > > With the notifier carrying only a pfn, to find the iova by this pfn, > the vendor driver must *also* keep a reverse-mapping. That's a bit > too much. > > Since the vendor could also suffer from IOMMU-compatible problem, > which means a local cache is always helpful, so I'd like to have the > iova carried to the notifier. > > What'd you say? I agree, the pfn is not unique, multiple guest pfns (iovas) might be backed by the same host pfn. DMA_UNMAP calls are based on iova, the notifier through to the vendor driver must be based on the same. Thanks, Alex > > + } while (vpfn && (prev_vpfn != vpfn)); > > + > > + WARN_ON(vpfn); > > +} > > + > > static int vfio_dma_do_unmap(struct vfio_iommu *iommu, > > struct vfio_iommu_type1_dma_unmap *unmap) > > { > > @@ -844,6 +891,9 @@ unlock: > > /* Report how much was unmapped */ > > unmap->size = unmapped; > > > > + if (unmapped && iommu->external_domain) > > + vfio_notifier_call_chain(iommu, unmap); > > + > > return ret; > > } > > > > @@ -1418,6 +1468,7 @@ static void *vfio_iommu_type1_open(unsigned long arg) > > INIT_LIST_HEAD(&iommu->domain_list); > > iommu->dma_list = RB_ROOT; > > mutex_init(&iommu->lock); > > + BLOCKING_INIT_NOTIFIER_HEAD(&iommu->notifier); > > > > return iommu; > > } > > @@ -1555,16 +1606,34 @@ static long vfio_iommu_type1_ioctl(void *iommu_data, > > return -ENOTTY; > > } > > > > +static int vfio_iommu_type1_register_notifier(void *iommu_data, > > + struct notifier_block *nb) > > +{ > > + struct vfio_iommu *iommu = iommu_data; > > + > > + return blocking_notifier_chain_register(&iommu->notifier, nb); > > +} > > + > > +static int vfio_iommu_type1_unregister_notifier(void *iommu_data, > > + struct notifier_block *nb) > > +{ > > + struct vfio_iommu *iommu = iommu_data; > > + > > + return blocking_notifier_chain_unregister(&iommu->notifier, nb); > > +} > > + > > static const struct vfio_iommu_driver_ops vfio_iommu_driver_ops_type1 = { > > - .name = "vfio-iommu-type1", > > - .owner = THIS_MODULE, > > - .open = vfio_iommu_type1_open, > > - .release = vfio_iommu_type1_release, > > - .ioctl = vfio_iommu_type1_ioctl, > > - .attach_group = vfio_iommu_type1_attach_group, > > - .detach_group = vfio_iommu_type1_detach_group, > > - .pin_pages = vfio_iommu_type1_pin_pages, > > - .unpin_pages = vfio_iommu_type1_unpin_pages, > > + .name = "vfio-iommu-type1", > > + .owner = THIS_MODULE, > > + .open = vfio_iommu_type1_open, > > + .release = vfio_iommu_type1_release, > > + .ioctl = vfio_iommu_type1_ioctl, > > + .attach_group = vfio_iommu_type1_attach_group, > > + .detach_group = vfio_iommu_type1_detach_group, > > + .pin_pages = vfio_iommu_type1_pin_pages, > > + .unpin_pages = vfio_iommu_type1_unpin_pages, > > + .register_notifier = vfio_iommu_type1_register_notifier, > > + .unregister_notifier = vfio_iommu_type1_unregister_notifier, > > }; > > > > static int __init vfio_iommu_type1_init(void) > > diff --git a/include/linux/vfio.h b/include/linux/vfio.h > > index 0609a2052846..4c91ce8bfaeb 100644 > > --- a/include/linux/vfio.h > > +++ b/include/linux/vfio.h > > @@ -80,6 +80,10 @@ struct vfio_iommu_driver_ops { > > unsigned long *phys_pfn); > > long (*unpin_pages)(void *iommu_data, unsigned long *pfn, > > long npage); > > + int (*register_notifier)(void *iommu_data, > > + struct notifier_block *nb); > > + int (*unregister_notifier)(void *iommu_data, > > + struct notifier_block *nb); > > }; > > > > extern int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops); > > @@ -137,6 +141,13 @@ extern long vfio_pin_pages(struct device *dev, unsigned long *user_pfn, > > extern long vfio_unpin_pages(struct device *dev, unsigned long *pfn, > > long npage); > > > > +#define VFIO_IOMMU_NOTIFY_DMA_UNMAP 1 > > + > > +extern int vfio_register_notifier(struct device *dev, > > + struct notifier_block *nb); > > + > > +extern int vfio_unregister_notifier(struct device *dev, > > + struct notifier_block *nb); > > /* > > * IRQfd - generic > > */ > > >
On 10/28/2016 6:10 PM, Alex Williamson wrote: > On Fri, 28 Oct 2016 15:33:58 +0800 > Jike Song <jike.song@intel.com> wrote: > >> On 10/27/2016 05:29 AM, Kirti Wankhede wrote: >>> Added blocking notifier to IOMMU TYPE1 driver to notify vendor drivers >>> about DMA_UNMAP. >>> Exported two APIs vfio_register_notifier() and vfio_unregister_notifier(). >>> Vendor driver should register notifer using these APIs. >>> Vendor driver should use VFIO_IOMMU_NOTIFY_DMA_UNMAP action to invalidate >>> mappings. >>> >>> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com> >>> Signed-off-by: Neo Jia <cjia@nvidia.com> >>> Change-Id: I5910d0024d6be87f3e8d3e0ca0eaeaaa0b17f271 >>> --- >>> drivers/vfio/vfio.c | 73 +++++++++++++++++++++++++++++++++ >>> drivers/vfio/vfio_iommu_type1.c | 89 ++++++++++++++++++++++++++++++++++++----- >>> include/linux/vfio.h | 11 +++++ >>> 3 files changed, 163 insertions(+), 10 deletions(-) >>> >>> diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c >>> index 28b50ca14c52..ff05ac6b1e90 100644 >>> --- a/drivers/vfio/vfio.c >>> +++ b/drivers/vfio/vfio.c >>> @@ -1891,6 +1891,79 @@ err_unpin_pages: >>> } >>> EXPORT_SYMBOL(vfio_unpin_pages); >>> >>> +int vfio_register_notifier(struct device *dev, struct notifier_block *nb) >>> +{ >>> + struct vfio_container *container; >>> + struct vfio_group *group; >>> + struct vfio_iommu_driver *driver; >>> + ssize_t ret; >>> + >>> + if (!dev || !nb) >>> + return -EINVAL; >>> + >>> + group = vfio_group_get_from_dev(dev); >>> + if (IS_ERR(group)) >>> + return PTR_ERR(group); >>> + >>> + ret = vfio_group_add_container_user(group); >>> + if (ret) >>> + goto err_register_nb; >>> + >>> + container = group->container; >>> + down_read(&container->group_lock); >>> + >>> + driver = container->iommu_driver; >>> + if (likely(driver && driver->ops->register_notifier)) >>> + ret = driver->ops->register_notifier(container->iommu_data, nb); >>> + else >>> + ret = -EINVAL; >>> + >>> + up_read(&container->group_lock); >>> + vfio_group_try_dissolve_container(group); >>> + >>> +err_register_nb: >>> + vfio_group_put(group); >>> + return ret; >>> +} >>> +EXPORT_SYMBOL(vfio_register_notifier); >>> + >>> +int vfio_unregister_notifier(struct device *dev, struct notifier_block *nb) >>> +{ >>> + struct vfio_container *container; >>> + struct vfio_group *group; >>> + struct vfio_iommu_driver *driver; >>> + ssize_t ret; >>> + >>> + if (!dev || !nb) >>> + return -EINVAL; >>> + >>> + group = vfio_group_get_from_dev(dev); >>> + if (IS_ERR(group)) >>> + return PTR_ERR(group); >>> + >>> + ret = vfio_group_add_container_user(group); >>> + if (ret) >>> + goto err_unregister_nb; >>> + >>> + container = group->container; >>> + down_read(&container->group_lock); >>> + >>> + driver = container->iommu_driver; >>> + if (likely(driver && driver->ops->unregister_notifier)) >>> + ret = driver->ops->unregister_notifier(container->iommu_data, >>> + nb); >>> + else >>> + ret = -EINVAL; >>> + >>> + up_read(&container->group_lock); >>> + vfio_group_try_dissolve_container(group); >>> + >>> +err_unregister_nb: >>> + vfio_group_put(group); >>> + return ret; >>> +} >>> +EXPORT_SYMBOL(vfio_unregister_notifier); >>> + >>> /** >>> * Module/class support >>> */ >>> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c >>> index 5add11a147e1..a4bd331ac0fd 100644 >>> --- a/drivers/vfio/vfio_iommu_type1.c >>> +++ b/drivers/vfio/vfio_iommu_type1.c >>> @@ -37,6 +37,7 @@ >>> #include <linux/vfio.h> >>> #include <linux/workqueue.h> >>> #include <linux/mdev.h> >>> +#include <linux/notifier.h> >>> >>> #define DRIVER_VERSION "0.2" >>> #define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>" >>> @@ -59,6 +60,7 @@ struct vfio_iommu { >>> struct vfio_domain *external_domain; /* domain for external user */ >>> struct mutex lock; >>> struct rb_root dma_list; >>> + struct blocking_notifier_head notifier; >>> bool v2; >>> bool nesting; >>> }; >>> @@ -549,7 +551,8 @@ static long vfio_iommu_type1_pin_pages(void *iommu_data, >>> >>> mutex_lock(&iommu->lock); >>> >>> - if (!iommu->external_domain) { >>> + /* Fail if notifier list is empty */ >>> + if ((!iommu->external_domain) || (!iommu->notifier.head)) { >>> ret = -EINVAL; >>> goto pin_done; >>> } >>> @@ -768,6 +771,50 @@ static unsigned long vfio_pgsize_bitmap(struct vfio_iommu *iommu) >>> return bitmap; >>> } >>> >>> +/* >>> + * This function finds pfn in domain->external_addr_space->pfn_list for given >>> + * iova range. If pfn exist, notify pfn to registered notifier list. On >>> + * receiving notifier callback, vendor driver should invalidate the mapping and >>> + * call vfio_unpin_pages() to unpin this pfn. With that vfio_pfn for this pfn >>> + * gets removed from rb tree of pfn_list. That re-arranges rb tree, so while >>> + * searching for next vfio_pfn in rb tree, start search from first node again. >>> + * If any vendor driver doesn't unpin that pfn, vfio_pfn would not get removed >>> + * from rb tree and so in next search vfio_pfn would be same as previous >>> + * vfio_pfn. In that case, exit from loop. >>> + */ >>> +static void vfio_notifier_call_chain(struct vfio_iommu *iommu, >>> + struct vfio_iommu_type1_dma_unmap *unmap) >>> +{ >>> + struct vfio_domain *domain = iommu->external_domain; >>> + struct rb_node *n; >>> + struct vfio_pfn *vpfn = NULL, *prev_vpfn; >>> + >>> + do { >>> + prev_vpfn = vpfn; >>> + mutex_lock(&domain->external_addr_space->pfn_list_lock); >>> + >>> + n = rb_first(&domain->external_addr_space->pfn_list); >>> + >>> + for (; n; n = rb_next(n), vpfn = NULL) { >>> + vpfn = rb_entry(n, struct vfio_pfn, node); >>> + >>> + if ((vpfn->iova >= unmap->iova) && >>> + (vpfn->iova < unmap->iova + unmap->size)) >>> + break; >>> + } >>> + >>> + mutex_unlock(&domain->external_addr_space->pfn_list_lock); >>> + >>> + /* Notify any listeners about DMA_UNMAP */ >>> + if (vpfn) >>> + blocking_notifier_call_chain(&iommu->notifier, >>> + VFIO_IOMMU_NOTIFY_DMA_UNMAP, >>> + &vpfn->pfn); >> >> Hi Kirti, >> >> The information carried by notifier is only a pfn. >> >> Since your pin/unpin interfaces design, it's the vendor driver who should >> guarantee pin/unpin same times. To achieve that, the vendor driver must >> cache it's iova->pfn mapping on its side, to avoid pinning a same page >> for multiple times. >> >> With the notifier carrying only a pfn, to find the iova by this pfn, >> the vendor driver must *also* keep a reverse-mapping. That's a bit >> too much. >> >> Since the vendor could also suffer from IOMMU-compatible problem, >> which means a local cache is always helpful, so I'd like to have the >> iova carried to the notifier. >> >> What'd you say? > > I agree, the pfn is not unique, multiple guest pfns (iovas) might be > backed by the same host pfn. DMA_UNMAP calls are based on iova, the > notifier through to the vendor driver must be based on the same. Host pfn should be unique, right? Kirti.
On Sat, 29 Oct 2016 01:32:35 +0530 Kirti Wankhede <kwankhede@nvidia.com> wrote: > On 10/28/2016 6:10 PM, Alex Williamson wrote: > > On Fri, 28 Oct 2016 15:33:58 +0800 > > Jike Song <jike.song@intel.com> wrote: > > > >> On 10/27/2016 05:29 AM, Kirti Wankhede wrote: > >>> Added blocking notifier to IOMMU TYPE1 driver to notify vendor drivers > >>> about DMA_UNMAP. > >>> Exported two APIs vfio_register_notifier() and vfio_unregister_notifier(). > >>> Vendor driver should register notifer using these APIs. > >>> Vendor driver should use VFIO_IOMMU_NOTIFY_DMA_UNMAP action to invalidate > >>> mappings. > >>> > >>> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com> > >>> Signed-off-by: Neo Jia <cjia@nvidia.com> > >>> Change-Id: I5910d0024d6be87f3e8d3e0ca0eaeaaa0b17f271 > >>> --- > >>> drivers/vfio/vfio.c | 73 +++++++++++++++++++++++++++++++++ > >>> drivers/vfio/vfio_iommu_type1.c | 89 ++++++++++++++++++++++++++++++++++++----- > >>> include/linux/vfio.h | 11 +++++ > >>> 3 files changed, 163 insertions(+), 10 deletions(-) > >>> > >>> diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c > >>> index 28b50ca14c52..ff05ac6b1e90 100644 > >>> --- a/drivers/vfio/vfio.c > >>> +++ b/drivers/vfio/vfio.c > >>> @@ -1891,6 +1891,79 @@ err_unpin_pages: > >>> } > >>> EXPORT_SYMBOL(vfio_unpin_pages); > >>> > >>> +int vfio_register_notifier(struct device *dev, struct notifier_block *nb) > >>> +{ > >>> + struct vfio_container *container; > >>> + struct vfio_group *group; > >>> + struct vfio_iommu_driver *driver; > >>> + ssize_t ret; > >>> + > >>> + if (!dev || !nb) > >>> + return -EINVAL; > >>> + > >>> + group = vfio_group_get_from_dev(dev); > >>> + if (IS_ERR(group)) > >>> + return PTR_ERR(group); > >>> + > >>> + ret = vfio_group_add_container_user(group); > >>> + if (ret) > >>> + goto err_register_nb; > >>> + > >>> + container = group->container; > >>> + down_read(&container->group_lock); > >>> + > >>> + driver = container->iommu_driver; > >>> + if (likely(driver && driver->ops->register_notifier)) > >>> + ret = driver->ops->register_notifier(container->iommu_data, nb); > >>> + else > >>> + ret = -EINVAL; > >>> + > >>> + up_read(&container->group_lock); > >>> + vfio_group_try_dissolve_container(group); > >>> + > >>> +err_register_nb: > >>> + vfio_group_put(group); > >>> + return ret; > >>> +} > >>> +EXPORT_SYMBOL(vfio_register_notifier); > >>> + > >>> +int vfio_unregister_notifier(struct device *dev, struct notifier_block *nb) > >>> +{ > >>> + struct vfio_container *container; > >>> + struct vfio_group *group; > >>> + struct vfio_iommu_driver *driver; > >>> + ssize_t ret; > >>> + > >>> + if (!dev || !nb) > >>> + return -EINVAL; > >>> + > >>> + group = vfio_group_get_from_dev(dev); > >>> + if (IS_ERR(group)) > >>> + return PTR_ERR(group); > >>> + > >>> + ret = vfio_group_add_container_user(group); > >>> + if (ret) > >>> + goto err_unregister_nb; > >>> + > >>> + container = group->container; > >>> + down_read(&container->group_lock); > >>> + > >>> + driver = container->iommu_driver; > >>> + if (likely(driver && driver->ops->unregister_notifier)) > >>> + ret = driver->ops->unregister_notifier(container->iommu_data, > >>> + nb); > >>> + else > >>> + ret = -EINVAL; > >>> + > >>> + up_read(&container->group_lock); > >>> + vfio_group_try_dissolve_container(group); > >>> + > >>> +err_unregister_nb: > >>> + vfio_group_put(group); > >>> + return ret; > >>> +} > >>> +EXPORT_SYMBOL(vfio_unregister_notifier); > >>> + > >>> /** > >>> * Module/class support > >>> */ > >>> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c > >>> index 5add11a147e1..a4bd331ac0fd 100644 > >>> --- a/drivers/vfio/vfio_iommu_type1.c > >>> +++ b/drivers/vfio/vfio_iommu_type1.c > >>> @@ -37,6 +37,7 @@ > >>> #include <linux/vfio.h> > >>> #include <linux/workqueue.h> > >>> #include <linux/mdev.h> > >>> +#include <linux/notifier.h> > >>> > >>> #define DRIVER_VERSION "0.2" > >>> #define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>" > >>> @@ -59,6 +60,7 @@ struct vfio_iommu { > >>> struct vfio_domain *external_domain; /* domain for external user */ > >>> struct mutex lock; > >>> struct rb_root dma_list; > >>> + struct blocking_notifier_head notifier; > >>> bool v2; > >>> bool nesting; > >>> }; > >>> @@ -549,7 +551,8 @@ static long vfio_iommu_type1_pin_pages(void *iommu_data, > >>> > >>> mutex_lock(&iommu->lock); > >>> > >>> - if (!iommu->external_domain) { > >>> + /* Fail if notifier list is empty */ > >>> + if ((!iommu->external_domain) || (!iommu->notifier.head)) { > >>> ret = -EINVAL; > >>> goto pin_done; > >>> } > >>> @@ -768,6 +771,50 @@ static unsigned long vfio_pgsize_bitmap(struct vfio_iommu *iommu) > >>> return bitmap; > >>> } > >>> > >>> +/* > >>> + * This function finds pfn in domain->external_addr_space->pfn_list for given > >>> + * iova range. If pfn exist, notify pfn to registered notifier list. On > >>> + * receiving notifier callback, vendor driver should invalidate the mapping and > >>> + * call vfio_unpin_pages() to unpin this pfn. With that vfio_pfn for this pfn > >>> + * gets removed from rb tree of pfn_list. That re-arranges rb tree, so while > >>> + * searching for next vfio_pfn in rb tree, start search from first node again. > >>> + * If any vendor driver doesn't unpin that pfn, vfio_pfn would not get removed > >>> + * from rb tree and so in next search vfio_pfn would be same as previous > >>> + * vfio_pfn. In that case, exit from loop. > >>> + */ > >>> +static void vfio_notifier_call_chain(struct vfio_iommu *iommu, > >>> + struct vfio_iommu_type1_dma_unmap *unmap) > >>> +{ > >>> + struct vfio_domain *domain = iommu->external_domain; > >>> + struct rb_node *n; > >>> + struct vfio_pfn *vpfn = NULL, *prev_vpfn; > >>> + > >>> + do { > >>> + prev_vpfn = vpfn; > >>> + mutex_lock(&domain->external_addr_space->pfn_list_lock); > >>> + > >>> + n = rb_first(&domain->external_addr_space->pfn_list); > >>> + > >>> + for (; n; n = rb_next(n), vpfn = NULL) { > >>> + vpfn = rb_entry(n, struct vfio_pfn, node); > >>> + > >>> + if ((vpfn->iova >= unmap->iova) && > >>> + (vpfn->iova < unmap->iova + unmap->size)) > >>> + break; > >>> + } > >>> + > >>> + mutex_unlock(&domain->external_addr_space->pfn_list_lock); > >>> + > >>> + /* Notify any listeners about DMA_UNMAP */ > >>> + if (vpfn) > >>> + blocking_notifier_call_chain(&iommu->notifier, > >>> + VFIO_IOMMU_NOTIFY_DMA_UNMAP, > >>> + &vpfn->pfn); > >> > >> Hi Kirti, > >> > >> The information carried by notifier is only a pfn. > >> > >> Since your pin/unpin interfaces design, it's the vendor driver who should > >> guarantee pin/unpin same times. To achieve that, the vendor driver must > >> cache it's iova->pfn mapping on its side, to avoid pinning a same page > >> for multiple times. > >> > >> With the notifier carrying only a pfn, to find the iova by this pfn, > >> the vendor driver must *also* keep a reverse-mapping. That's a bit > >> too much. > >> > >> Since the vendor could also suffer from IOMMU-compatible problem, > >> which means a local cache is always helpful, so I'd like to have the > >> iova carried to the notifier. > >> > >> What'd you say? > > > > I agree, the pfn is not unique, multiple guest pfns (iovas) might be > > backed by the same host pfn. DMA_UNMAP calls are based on iova, the > > notifier through to the vendor driver must be based on the same. > > Host pfn should be unique, right? Let's say a user does a malloc of a single page and does 100 calls to MAP_DMA populating 100 pages of IOVA space all backed by the same malloc'd page. This is valid, I have unit tests that do essentially this. Those will all have the same pfn. The user then does an UNMAP_DMA to a single one of those IOVA pages. Did the user unmap everything matching that pfn? Of course not, they only unmapped that one IOVA page. There is no guarantee of a 1:1 mapping of pfn to IOVA. UNMAP_DMA works based on IOVA. Invalidation broadcasts to the vendor driver MUST therefore also work based on IOVA. This is not an academic problem, address space aliases exist in real VMs, imagine a virtual IOMMU. Thanks, Alex
On 10/29/2016 2:03 AM, Alex Williamson wrote: > On Sat, 29 Oct 2016 01:32:35 +0530 > Kirti Wankhede <kwankhede@nvidia.com> wrote: > >> On 10/28/2016 6:10 PM, Alex Williamson wrote: >>> On Fri, 28 Oct 2016 15:33:58 +0800 >>> Jike Song <jike.song@intel.com> wrote: >>> ... >>>>> >>>>> +/* >>>>> + * This function finds pfn in domain->external_addr_space->pfn_list for given >>>>> + * iova range. If pfn exist, notify pfn to registered notifier list. On >>>>> + * receiving notifier callback, vendor driver should invalidate the mapping and >>>>> + * call vfio_unpin_pages() to unpin this pfn. With that vfio_pfn for this pfn >>>>> + * gets removed from rb tree of pfn_list. That re-arranges rb tree, so while >>>>> + * searching for next vfio_pfn in rb tree, start search from first node again. >>>>> + * If any vendor driver doesn't unpin that pfn, vfio_pfn would not get removed >>>>> + * from rb tree and so in next search vfio_pfn would be same as previous >>>>> + * vfio_pfn. In that case, exit from loop. >>>>> + */ >>>>> +static void vfio_notifier_call_chain(struct vfio_iommu *iommu, >>>>> + struct vfio_iommu_type1_dma_unmap *unmap) >>>>> +{ >>>>> + struct vfio_domain *domain = iommu->external_domain; >>>>> + struct rb_node *n; >>>>> + struct vfio_pfn *vpfn = NULL, *prev_vpfn; >>>>> + >>>>> + do { >>>>> + prev_vpfn = vpfn; >>>>> + mutex_lock(&domain->external_addr_space->pfn_list_lock); >>>>> + >>>>> + n = rb_first(&domain->external_addr_space->pfn_list); >>>>> + >>>>> + for (; n; n = rb_next(n), vpfn = NULL) { >>>>> + vpfn = rb_entry(n, struct vfio_pfn, node); >>>>> + >>>>> + if ((vpfn->iova >= unmap->iova) && >>>>> + (vpfn->iova < unmap->iova + unmap->size)) >>>>> + break; >>>>> + } >>>>> + >>>>> + mutex_unlock(&domain->external_addr_space->pfn_list_lock); >>>>> + >>>>> + /* Notify any listeners about DMA_UNMAP */ >>>>> + if (vpfn) >>>>> + blocking_notifier_call_chain(&iommu->notifier, >>>>> + VFIO_IOMMU_NOTIFY_DMA_UNMAP, >>>>> + &vpfn->pfn); >>>> >>>> Hi Kirti, >>>> >>>> The information carried by notifier is only a pfn. >>>> >>>> Since your pin/unpin interfaces design, it's the vendor driver who should >>>> guarantee pin/unpin same times. To achieve that, the vendor driver must >>>> cache it's iova->pfn mapping on its side, to avoid pinning a same page >>>> for multiple times. >>>> >>>> With the notifier carrying only a pfn, to find the iova by this pfn, >>>> the vendor driver must *also* keep a reverse-mapping. That's a bit >>>> too much. >>>> >>>> Since the vendor could also suffer from IOMMU-compatible problem, >>>> which means a local cache is always helpful, so I'd like to have the >>>> iova carried to the notifier. >>>> >>>> What'd you say? >>> >>> I agree, the pfn is not unique, multiple guest pfns (iovas) might be >>> backed by the same host pfn. DMA_UNMAP calls are based on iova, the >>> notifier through to the vendor driver must be based on the same. >> >> Host pfn should be unique, right? > > Let's say a user does a malloc of a single page and does 100 calls to > MAP_DMA populating 100 pages of IOVA space all backed by the same > malloc'd page. This is valid, I have unit tests that do essentially > this. Those will all have the same pfn. The user then does an > UNMAP_DMA to a single one of those IOVA pages. Did the user unmap > everything matching that pfn? Of course not, they only unmapped that > one IOVA page. There is no guarantee of a 1:1 mapping of pfn to IOVA. > UNMAP_DMA works based on IOVA. Invalidation broadcasts to the vendor > driver MUST therefore also work based on IOVA. This is not an academic > problem, address space aliases exist in real VMs, imagine a virtual > IOMMU. Thanks, > So struct vfio_iommu_type1_dma_unmap should be passed as argument to notifier callback: if (unmapped && iommu->external_domain) - vfio_notifier_call_chain(iommu, unmap); + blocking_notifier_call_chain(&iommu->notifier, + VFIO_IOMMU_NOTIFY_DMA_UNMAP, + unmap); Then vendor driver should find pfns he has pinned from this range of iovas, then invalidate and unpin pfns. Right? Thanks, Kirti
On Sat, 29 Oct 2016 16:07:05 +0530 Kirti Wankhede <kwankhede@nvidia.com> wrote: > On 10/29/2016 2:03 AM, Alex Williamson wrote: > > On Sat, 29 Oct 2016 01:32:35 +0530 > > Kirti Wankhede <kwankhede@nvidia.com> wrote: > > > >> On 10/28/2016 6:10 PM, Alex Williamson wrote: > >>> On Fri, 28 Oct 2016 15:33:58 +0800 > >>> Jike Song <jike.song@intel.com> wrote: > >>> > ... > >>>>> > >>>>> +/* > >>>>> + * This function finds pfn in domain->external_addr_space->pfn_list for given > >>>>> + * iova range. If pfn exist, notify pfn to registered notifier list. On > >>>>> + * receiving notifier callback, vendor driver should invalidate the mapping and > >>>>> + * call vfio_unpin_pages() to unpin this pfn. With that vfio_pfn for this pfn > >>>>> + * gets removed from rb tree of pfn_list. That re-arranges rb tree, so while > >>>>> + * searching for next vfio_pfn in rb tree, start search from first node again. > >>>>> + * If any vendor driver doesn't unpin that pfn, vfio_pfn would not get removed > >>>>> + * from rb tree and so in next search vfio_pfn would be same as previous > >>>>> + * vfio_pfn. In that case, exit from loop. > >>>>> + */ > >>>>> +static void vfio_notifier_call_chain(struct vfio_iommu *iommu, > >>>>> + struct vfio_iommu_type1_dma_unmap *unmap) > >>>>> +{ > >>>>> + struct vfio_domain *domain = iommu->external_domain; > >>>>> + struct rb_node *n; > >>>>> + struct vfio_pfn *vpfn = NULL, *prev_vpfn; > >>>>> + > >>>>> + do { > >>>>> + prev_vpfn = vpfn; > >>>>> + mutex_lock(&domain->external_addr_space->pfn_list_lock); > >>>>> + > >>>>> + n = rb_first(&domain->external_addr_space->pfn_list); > >>>>> + > >>>>> + for (; n; n = rb_next(n), vpfn = NULL) { > >>>>> + vpfn = rb_entry(n, struct vfio_pfn, node); > >>>>> + > >>>>> + if ((vpfn->iova >= unmap->iova) && > >>>>> + (vpfn->iova < unmap->iova + unmap->size)) > >>>>> + break; > >>>>> + } > >>>>> + > >>>>> + mutex_unlock(&domain->external_addr_space->pfn_list_lock); > >>>>> + > >>>>> + /* Notify any listeners about DMA_UNMAP */ > >>>>> + if (vpfn) > >>>>> + blocking_notifier_call_chain(&iommu->notifier, > >>>>> + VFIO_IOMMU_NOTIFY_DMA_UNMAP, > >>>>> + &vpfn->pfn); > >>>> > >>>> Hi Kirti, > >>>> > >>>> The information carried by notifier is only a pfn. > >>>> > >>>> Since your pin/unpin interfaces design, it's the vendor driver who should > >>>> guarantee pin/unpin same times. To achieve that, the vendor driver must > >>>> cache it's iova->pfn mapping on its side, to avoid pinning a same page > >>>> for multiple times. > >>>> > >>>> With the notifier carrying only a pfn, to find the iova by this pfn, > >>>> the vendor driver must *also* keep a reverse-mapping. That's a bit > >>>> too much. > >>>> > >>>> Since the vendor could also suffer from IOMMU-compatible problem, > >>>> which means a local cache is always helpful, so I'd like to have the > >>>> iova carried to the notifier. > >>>> > >>>> What'd you say? > >>> > >>> I agree, the pfn is not unique, multiple guest pfns (iovas) might be > >>> backed by the same host pfn. DMA_UNMAP calls are based on iova, the > >>> notifier through to the vendor driver must be based on the same. > >> > >> Host pfn should be unique, right? > > > > Let's say a user does a malloc of a single page and does 100 calls to > > MAP_DMA populating 100 pages of IOVA space all backed by the same > > malloc'd page. This is valid, I have unit tests that do essentially > > this. Those will all have the same pfn. The user then does an > > UNMAP_DMA to a single one of those IOVA pages. Did the user unmap > > everything matching that pfn? Of course not, they only unmapped that > > one IOVA page. There is no guarantee of a 1:1 mapping of pfn to IOVA. > > UNMAP_DMA works based on IOVA. Invalidation broadcasts to the vendor > > driver MUST therefore also work based on IOVA. This is not an academic > > problem, address space aliases exist in real VMs, imagine a virtual > > IOMMU. Thanks, > > > > > So struct vfio_iommu_type1_dma_unmap should be passed as argument to > notifier callback: > > if (unmapped && iommu->external_domain) > - vfio_notifier_call_chain(iommu, unmap); > + blocking_notifier_call_chain(&iommu->notifier, > + VFIO_IOMMU_NOTIFY_DMA_UNMAP, > + unmap); > > Then vendor driver should find pfns he has pinned from this range of > iovas, then invalidate and unpin pfns. Right? That seems like a valid choice. It's probably better than calling the notifier for each page of iova. Thanks, Alex
On 10/27/2016 05:29 AM, Kirti Wankhede wrote: > Added blocking notifier to IOMMU TYPE1 driver to notify vendor drivers > about DMA_UNMAP. > Exported two APIs vfio_register_notifier() and vfio_unregister_notifier(). > Vendor driver should register notifer using these APIs. > Vendor driver should use VFIO_IOMMU_NOTIFY_DMA_UNMAP action to invalidate > mappings. > > Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com> > Signed-off-by: Neo Jia <cjia@nvidia.com> > Change-Id: I5910d0024d6be87f3e8d3e0ca0eaeaaa0b17f271 > --- > drivers/vfio/vfio.c | 73 +++++++++++++++++++++++++++++++++ > drivers/vfio/vfio_iommu_type1.c | 89 ++++++++++++++++++++++++++++++++++++----- > include/linux/vfio.h | 11 +++++ > 3 files changed, 163 insertions(+), 10 deletions(-) > > diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c > index 28b50ca14c52..ff05ac6b1e90 100644 > --- a/drivers/vfio/vfio.c > +++ b/drivers/vfio/vfio.c > @@ -1891,6 +1891,79 @@ err_unpin_pages: > } > EXPORT_SYMBOL(vfio_unpin_pages); > > +int vfio_register_notifier(struct device *dev, struct notifier_block *nb) > +{ Hi Kirti, Given that below 4 methods are members of vfio_iommu_driver_ops: pin_pages unpin_pages register_notifier unregister_notifier the names of exposed VFIO APIs could possibly be clearer: vfio_iommu_pin_pages vfio_iommu_unpin_pages vfio_iommu_register_notifier vfio_iommu_unreigster_nodier -- Thanks, Jike > + struct vfio_container *container; > + struct vfio_group *group; > + struct vfio_iommu_driver *driver; > + ssize_t ret; > + > + if (!dev || !nb) > + return -EINVAL; > + > + group = vfio_group_get_from_dev(dev); > + if (IS_ERR(group)) > + return PTR_ERR(group); > + > + ret = vfio_group_add_container_user(group); > + if (ret) > + goto err_register_nb; > + > + container = group->container; > + down_read(&container->group_lock); > + > + driver = container->iommu_driver; > + if (likely(driver && driver->ops->register_notifier)) > + ret = driver->ops->register_notifier(container->iommu_data, nb); > + else > + ret = -EINVAL; > + > + up_read(&container->group_lock); > + vfio_group_try_dissolve_container(group); > + > +err_register_nb: > + vfio_group_put(group); > + return ret; > +} > +EXPORT_SYMBOL(vfio_register_notifier); > + > +int vfio_unregister_notifier(struct device *dev, struct notifier_block *nb) > +{ > + struct vfio_container *container; > + struct vfio_group *group; > + struct vfio_iommu_driver *driver; > + ssize_t ret; > + > + if (!dev || !nb) > + return -EINVAL; > + > + group = vfio_group_get_from_dev(dev); > + if (IS_ERR(group)) > + return PTR_ERR(group); > + > + ret = vfio_group_add_container_user(group); > + if (ret) > + goto err_unregister_nb; > + > + container = group->container; > + down_read(&container->group_lock); > + > + driver = container->iommu_driver; > + if (likely(driver && driver->ops->unregister_notifier)) > + ret = driver->ops->unregister_notifier(container->iommu_data, > + nb); > + else > + ret = -EINVAL; > + > + up_read(&container->group_lock); > + vfio_group_try_dissolve_container(group); > + > +err_unregister_nb: > + vfio_group_put(group); > + return ret; > +} > +EXPORT_SYMBOL(vfio_unregister_notifier); > + > /** > * Module/class support > */ > diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c > index 5add11a147e1..a4bd331ac0fd 100644 > --- a/drivers/vfio/vfio_iommu_type1.c > +++ b/drivers/vfio/vfio_iommu_type1.c > @@ -37,6 +37,7 @@ > #include <linux/vfio.h> > #include <linux/workqueue.h> > #include <linux/mdev.h> > +#include <linux/notifier.h> > > #define DRIVER_VERSION "0.2" > #define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>" > @@ -59,6 +60,7 @@ struct vfio_iommu { > struct vfio_domain *external_domain; /* domain for external user */ > struct mutex lock; > struct rb_root dma_list; > + struct blocking_notifier_head notifier; > bool v2; > bool nesting; > }; > @@ -549,7 +551,8 @@ static long vfio_iommu_type1_pin_pages(void *iommu_data, > > mutex_lock(&iommu->lock); > > - if (!iommu->external_domain) { > + /* Fail if notifier list is empty */ > + if ((!iommu->external_domain) || (!iommu->notifier.head)) { > ret = -EINVAL; > goto pin_done; > } > @@ -768,6 +771,50 @@ static unsigned long vfio_pgsize_bitmap(struct vfio_iommu *iommu) > return bitmap; > } > > +/* > + * This function finds pfn in domain->external_addr_space->pfn_list for given > + * iova range. If pfn exist, notify pfn to registered notifier list. On > + * receiving notifier callback, vendor driver should invalidate the mapping and > + * call vfio_unpin_pages() to unpin this pfn. With that vfio_pfn for this pfn > + * gets removed from rb tree of pfn_list. That re-arranges rb tree, so while > + * searching for next vfio_pfn in rb tree, start search from first node again. > + * If any vendor driver doesn't unpin that pfn, vfio_pfn would not get removed > + * from rb tree and so in next search vfio_pfn would be same as previous > + * vfio_pfn. In that case, exit from loop. > + */ > +static void vfio_notifier_call_chain(struct vfio_iommu *iommu, > + struct vfio_iommu_type1_dma_unmap *unmap) > +{ > + struct vfio_domain *domain = iommu->external_domain; > + struct rb_node *n; > + struct vfio_pfn *vpfn = NULL, *prev_vpfn; > + > + do { > + prev_vpfn = vpfn; > + mutex_lock(&domain->external_addr_space->pfn_list_lock); > + > + n = rb_first(&domain->external_addr_space->pfn_list); > + > + for (; n; n = rb_next(n), vpfn = NULL) { > + vpfn = rb_entry(n, struct vfio_pfn, node); > + > + if ((vpfn->iova >= unmap->iova) && > + (vpfn->iova < unmap->iova + unmap->size)) > + break; > + } > + > + mutex_unlock(&domain->external_addr_space->pfn_list_lock); > + > + /* Notify any listeners about DMA_UNMAP */ > + if (vpfn) > + blocking_notifier_call_chain(&iommu->notifier, > + VFIO_IOMMU_NOTIFY_DMA_UNMAP, > + &vpfn->pfn); > + } while (vpfn && (prev_vpfn != vpfn)); > + > + WARN_ON(vpfn); > +} > + > static int vfio_dma_do_unmap(struct vfio_iommu *iommu, > struct vfio_iommu_type1_dma_unmap *unmap) > { > @@ -844,6 +891,9 @@ unlock: > /* Report how much was unmapped */ > unmap->size = unmapped; > > + if (unmapped && iommu->external_domain) > + vfio_notifier_call_chain(iommu, unmap); > + > return ret; > } > > @@ -1418,6 +1468,7 @@ static void *vfio_iommu_type1_open(unsigned long arg) > INIT_LIST_HEAD(&iommu->domain_list); > iommu->dma_list = RB_ROOT; > mutex_init(&iommu->lock); > + BLOCKING_INIT_NOTIFIER_HEAD(&iommu->notifier); > > return iommu; > } > @@ -1555,16 +1606,34 @@ static long vfio_iommu_type1_ioctl(void *iommu_data, > return -ENOTTY; > } > > +static int vfio_iommu_type1_register_notifier(void *iommu_data, > + struct notifier_block *nb) > +{ > + struct vfio_iommu *iommu = iommu_data; > + > + return blocking_notifier_chain_register(&iommu->notifier, nb); > +} > + > +static int vfio_iommu_type1_unregister_notifier(void *iommu_data, > + struct notifier_block *nb) > +{ > + struct vfio_iommu *iommu = iommu_data; > + > + return blocking_notifier_chain_unregister(&iommu->notifier, nb); > +} > + > static const struct vfio_iommu_driver_ops vfio_iommu_driver_ops_type1 = { > - .name = "vfio-iommu-type1", > - .owner = THIS_MODULE, > - .open = vfio_iommu_type1_open, > - .release = vfio_iommu_type1_release, > - .ioctl = vfio_iommu_type1_ioctl, > - .attach_group = vfio_iommu_type1_attach_group, > - .detach_group = vfio_iommu_type1_detach_group, > - .pin_pages = vfio_iommu_type1_pin_pages, > - .unpin_pages = vfio_iommu_type1_unpin_pages, > + .name = "vfio-iommu-type1", > + .owner = THIS_MODULE, > + .open = vfio_iommu_type1_open, > + .release = vfio_iommu_type1_release, > + .ioctl = vfio_iommu_type1_ioctl, > + .attach_group = vfio_iommu_type1_attach_group, > + .detach_group = vfio_iommu_type1_detach_group, > + .pin_pages = vfio_iommu_type1_pin_pages, > + .unpin_pages = vfio_iommu_type1_unpin_pages, > + .register_notifier = vfio_iommu_type1_register_notifier, > + .unregister_notifier = vfio_iommu_type1_unregister_notifier, > }; > > static int __init vfio_iommu_type1_init(void) > diff --git a/include/linux/vfio.h b/include/linux/vfio.h > index 0609a2052846..4c91ce8bfaeb 100644 > --- a/include/linux/vfio.h > +++ b/include/linux/vfio.h > @@ -80,6 +80,10 @@ struct vfio_iommu_driver_ops { > unsigned long *phys_pfn); > long (*unpin_pages)(void *iommu_data, unsigned long *pfn, > long npage); > + int (*register_notifier)(void *iommu_data, > + struct notifier_block *nb); > + int (*unregister_notifier)(void *iommu_data, > + struct notifier_block *nb); > }; > > extern int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops); > @@ -137,6 +141,13 @@ extern long vfio_pin_pages(struct device *dev, unsigned long *user_pfn, > extern long vfio_unpin_pages(struct device *dev, unsigned long *pfn, > long npage); > > +#define VFIO_IOMMU_NOTIFY_DMA_UNMAP 1 > + > +extern int vfio_register_notifier(struct device *dev, > + struct notifier_block *nb); > + > +extern int vfio_unregister_notifier(struct device *dev, > + struct notifier_block *nb); > /* > * IRQfd - generic > */ >
On 10/31/2016 9:20 AM, Jike Song wrote: > On 10/27/2016 05:29 AM, Kirti Wankhede wrote: >> Added blocking notifier to IOMMU TYPE1 driver to notify vendor drivers >> about DMA_UNMAP. >> Exported two APIs vfio_register_notifier() and vfio_unregister_notifier(). >> Vendor driver should register notifer using these APIs. >> Vendor driver should use VFIO_IOMMU_NOTIFY_DMA_UNMAP action to invalidate >> mappings. >> >> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com> >> Signed-off-by: Neo Jia <cjia@nvidia.com> >> Change-Id: I5910d0024d6be87f3e8d3e0ca0eaeaaa0b17f271 >> --- >> drivers/vfio/vfio.c | 73 +++++++++++++++++++++++++++++++++ >> drivers/vfio/vfio_iommu_type1.c | 89 ++++++++++++++++++++++++++++++++++++----- >> include/linux/vfio.h | 11 +++++ >> 3 files changed, 163 insertions(+), 10 deletions(-) >> >> diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c >> index 28b50ca14c52..ff05ac6b1e90 100644 >> --- a/drivers/vfio/vfio.c >> +++ b/drivers/vfio/vfio.c >> @@ -1891,6 +1891,79 @@ err_unpin_pages: >> } >> EXPORT_SYMBOL(vfio_unpin_pages); >> >> +int vfio_register_notifier(struct device *dev, struct notifier_block *nb) >> +{ > > Hi Kirti, > > Given that below 4 methods are members of vfio_iommu_driver_ops: > > pin_pages > unpin_pages > register_notifier > unregister_notifier > > the names of exposed VFIO APIs could possibly be clearer: > > vfio_iommu_pin_pages > vfio_iommu_unpin_pages > vfio_iommu_register_notifier > vfio_iommu_unreigster_nodier > Hey Jike, I had followed the same style as other members in this structure: attach_group detach_group Thanks, Kirti
On 10/31/2016 01:59 PM, Kirti Wankhede wrote: > On 10/31/2016 9:20 AM, Jike Song wrote: >> On 10/27/2016 05:29 AM, Kirti Wankhede wrote: >>> Added blocking notifier to IOMMU TYPE1 driver to notify vendor drivers >>> about DMA_UNMAP. >>> Exported two APIs vfio_register_notifier() and vfio_unregister_notifier(). >>> Vendor driver should register notifer using these APIs. >>> Vendor driver should use VFIO_IOMMU_NOTIFY_DMA_UNMAP action to invalidate >>> mappings. >>> >>> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com> >>> Signed-off-by: Neo Jia <cjia@nvidia.com> >>> Change-Id: I5910d0024d6be87f3e8d3e0ca0eaeaaa0b17f271 >>> --- >>> drivers/vfio/vfio.c | 73 +++++++++++++++++++++++++++++++++ >>> drivers/vfio/vfio_iommu_type1.c | 89 ++++++++++++++++++++++++++++++++++++----- >>> include/linux/vfio.h | 11 +++++ >>> 3 files changed, 163 insertions(+), 10 deletions(-) >>> >>> diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c >>> index 28b50ca14c52..ff05ac6b1e90 100644 >>> --- a/drivers/vfio/vfio.c >>> +++ b/drivers/vfio/vfio.c >>> @@ -1891,6 +1891,79 @@ err_unpin_pages: >>> } >>> EXPORT_SYMBOL(vfio_unpin_pages); >>> >>> +int vfio_register_notifier(struct device *dev, struct notifier_block *nb) >>> +{ >> >> Hi Kirti, >> >> Given that below 4 methods are members of vfio_iommu_driver_ops: >> >> pin_pages >> unpin_pages >> register_notifier >> unregister_notifier >> >> the names of exposed VFIO APIs could possibly be clearer: >> >> vfio_iommu_pin_pages >> vfio_iommu_unpin_pages >> vfio_iommu_register_notifier >> vfio_iommu_unreigster_nodier >> > > Hey Jike, > > I had followed the same style as other members in this structure: > > attach_group > detach_group > I mean the APIs exposed. For example, vfio_register_notifier() is somehow by the name too generic to know what it is provided for. -- Thanks, Jike
* Alex Williamson <alex.williamson@redhat.com> [2016-10-29 08:03:01 -0600]: > On Sat, 29 Oct 2016 16:07:05 +0530 > Kirti Wankhede <kwankhede@nvidia.com> wrote: > > > On 10/29/2016 2:03 AM, Alex Williamson wrote: > > > On Sat, 29 Oct 2016 01:32:35 +0530 > > > Kirti Wankhede <kwankhede@nvidia.com> wrote: > > > > > >> On 10/28/2016 6:10 PM, Alex Williamson wrote: > > >>> On Fri, 28 Oct 2016 15:33:58 +0800 > > >>> Jike Song <jike.song@intel.com> wrote: > > >>> > > ... > > >>>>> > > >>>>> +/* > > >>>>> + * This function finds pfn in domain->external_addr_space->pfn_list for given > > >>>>> + * iova range. If pfn exist, notify pfn to registered notifier list. On > > >>>>> + * receiving notifier callback, vendor driver should invalidate the mapping and > > >>>>> + * call vfio_unpin_pages() to unpin this pfn. With that vfio_pfn for this pfn > > >>>>> + * gets removed from rb tree of pfn_list. That re-arranges rb tree, so while > > >>>>> + * searching for next vfio_pfn in rb tree, start search from first node again. > > >>>>> + * If any vendor driver doesn't unpin that pfn, vfio_pfn would not get removed > > >>>>> + * from rb tree and so in next search vfio_pfn would be same as previous > > >>>>> + * vfio_pfn. In that case, exit from loop. > > >>>>> + */ > > >>>>> +static void vfio_notifier_call_chain(struct vfio_iommu *iommu, > > >>>>> + struct vfio_iommu_type1_dma_unmap *unmap) > > >>>>> +{ > > >>>>> + struct vfio_domain *domain = iommu->external_domain; > > >>>>> + struct rb_node *n; > > >>>>> + struct vfio_pfn *vpfn = NULL, *prev_vpfn; > > >>>>> + > > >>>>> + do { > > >>>>> + prev_vpfn = vpfn; > > >>>>> + mutex_lock(&domain->external_addr_space->pfn_list_lock); > > >>>>> + > > >>>>> + n = rb_first(&domain->external_addr_space->pfn_list); > > >>>>> + > > >>>>> + for (; n; n = rb_next(n), vpfn = NULL) { > > >>>>> + vpfn = rb_entry(n, struct vfio_pfn, node); > > >>>>> + > > >>>>> + if ((vpfn->iova >= unmap->iova) && > > >>>>> + (vpfn->iova < unmap->iova + unmap->size)) > > >>>>> + break; > > >>>>> + } > > >>>>> + > > >>>>> + mutex_unlock(&domain->external_addr_space->pfn_list_lock); > > >>>>> + > > >>>>> + /* Notify any listeners about DMA_UNMAP */ > > >>>>> + if (vpfn) > > >>>>> + blocking_notifier_call_chain(&iommu->notifier, > > >>>>> + VFIO_IOMMU_NOTIFY_DMA_UNMAP, > > >>>>> + &vpfn->pfn); > > >>>> > > >>>> Hi Kirti, > > >>>> > > >>>> The information carried by notifier is only a pfn. > > >>>> > > >>>> Since your pin/unpin interfaces design, it's the vendor driver who should > > >>>> guarantee pin/unpin same times. To achieve that, the vendor driver must > > >>>> cache it's iova->pfn mapping on its side, to avoid pinning a same page > > >>>> for multiple times. > > >>>> > > >>>> With the notifier carrying only a pfn, to find the iova by this pfn, > > >>>> the vendor driver must *also* keep a reverse-mapping. That's a bit > > >>>> too much. > > >>>> > > >>>> Since the vendor could also suffer from IOMMU-compatible problem, > > >>>> which means a local cache is always helpful, so I'd like to have the > > >>>> iova carried to the notifier. > > >>>> > > >>>> What'd you say? > > >>> > > >>> I agree, the pfn is not unique, multiple guest pfns (iovas) might be > > >>> backed by the same host pfn. DMA_UNMAP calls are based on iova, the > > >>> notifier through to the vendor driver must be based on the same. > > >> > > >> Host pfn should be unique, right? > > > > > > Let's say a user does a malloc of a single page and does 100 calls to > > > MAP_DMA populating 100 pages of IOVA space all backed by the same > > > malloc'd page. This is valid, I have unit tests that do essentially > > > this. Those will all have the same pfn. The user then does an > > > UNMAP_DMA to a single one of those IOVA pages. Did the user unmap > > > everything matching that pfn? Of course not, they only unmapped that > > > one IOVA page. There is no guarantee of a 1:1 mapping of pfn to IOVA. > > > UNMAP_DMA works based on IOVA. Invalidation broadcasts to the vendor > > > driver MUST therefore also work based on IOVA. This is not an academic > > > problem, address space aliases exist in real VMs, imagine a virtual > > > IOMMU. Thanks, > > > > > > > > > So struct vfio_iommu_type1_dma_unmap should be passed as argument to > > notifier callback: > > > > if (unmapped && iommu->external_domain) > > - vfio_notifier_call_chain(iommu, unmap); > > + blocking_notifier_call_chain(&iommu->notifier, > > + VFIO_IOMMU_NOTIFY_DMA_UNMAP, > > + unmap); > > > > Then vendor driver should find pfns he has pinned from this range of > > iovas, then invalidate and unpin pfns. Right? > > That seems like a valid choice. It's probably better than calling the > notifier for each page of iova. Thanks, > > Alex > Hi Kirti, This version requires the *vendor driver* call vfio_register_notifier for an mdev device before any pinning operations. I guess all of the vendor drivers may have some alike code for notifier registration/unregistration. My question is, how about letting the mdev framework managing the notifier registration/unregistration process? We could add a notifier_fn_t callback to "struct parent_ops", then the mdev framework should make sure that the vendor driver assigned a value to this callback. The mdev core could initiate a notifier_block for each parent driver with its callback, and register/unregister it to vfio in the right time.
On 11/1/2016 9:15 AM, Dong Jia Shi wrote: > * Alex Williamson <alex.williamson@redhat.com> [2016-10-29 08:03:01 -0600]: > >> On Sat, 29 Oct 2016 16:07:05 +0530 >> Kirti Wankhede <kwankhede@nvidia.com> wrote: >> >>> On 10/29/2016 2:03 AM, Alex Williamson wrote: >>>> On Sat, 29 Oct 2016 01:32:35 +0530 >>>> Kirti Wankhede <kwankhede@nvidia.com> wrote: >>>> >>>>> On 10/28/2016 6:10 PM, Alex Williamson wrote: >>>>>> On Fri, 28 Oct 2016 15:33:58 +0800 >>>>>> Jike Song <jike.song@intel.com> wrote: >>>>>> >>> ... >>>>>>>> >>>>>>>> +/* >>>>>>>> + * This function finds pfn in domain->external_addr_space->pfn_list for given >>>>>>>> + * iova range. If pfn exist, notify pfn to registered notifier list. On >>>>>>>> + * receiving notifier callback, vendor driver should invalidate the mapping and >>>>>>>> + * call vfio_unpin_pages() to unpin this pfn. With that vfio_pfn for this pfn >>>>>>>> + * gets removed from rb tree of pfn_list. That re-arranges rb tree, so while >>>>>>>> + * searching for next vfio_pfn in rb tree, start search from first node again. >>>>>>>> + * If any vendor driver doesn't unpin that pfn, vfio_pfn would not get removed >>>>>>>> + * from rb tree and so in next search vfio_pfn would be same as previous >>>>>>>> + * vfio_pfn. In that case, exit from loop. >>>>>>>> + */ >>>>>>>> +static void vfio_notifier_call_chain(struct vfio_iommu *iommu, >>>>>>>> + struct vfio_iommu_type1_dma_unmap *unmap) >>>>>>>> +{ >>>>>>>> + struct vfio_domain *domain = iommu->external_domain; >>>>>>>> + struct rb_node *n; >>>>>>>> + struct vfio_pfn *vpfn = NULL, *prev_vpfn; >>>>>>>> + >>>>>>>> + do { >>>>>>>> + prev_vpfn = vpfn; >>>>>>>> + mutex_lock(&domain->external_addr_space->pfn_list_lock); >>>>>>>> + >>>>>>>> + n = rb_first(&domain->external_addr_space->pfn_list); >>>>>>>> + >>>>>>>> + for (; n; n = rb_next(n), vpfn = NULL) { >>>>>>>> + vpfn = rb_entry(n, struct vfio_pfn, node); >>>>>>>> + >>>>>>>> + if ((vpfn->iova >= unmap->iova) && >>>>>>>> + (vpfn->iova < unmap->iova + unmap->size)) >>>>>>>> + break; >>>>>>>> + } >>>>>>>> + >>>>>>>> + mutex_unlock(&domain->external_addr_space->pfn_list_lock); >>>>>>>> + >>>>>>>> + /* Notify any listeners about DMA_UNMAP */ >>>>>>>> + if (vpfn) >>>>>>>> + blocking_notifier_call_chain(&iommu->notifier, >>>>>>>> + VFIO_IOMMU_NOTIFY_DMA_UNMAP, >>>>>>>> + &vpfn->pfn); >>>>>>> >>>>>>> Hi Kirti, >>>>>>> >>>>>>> The information carried by notifier is only a pfn. >>>>>>> >>>>>>> Since your pin/unpin interfaces design, it's the vendor driver who should >>>>>>> guarantee pin/unpin same times. To achieve that, the vendor driver must >>>>>>> cache it's iova->pfn mapping on its side, to avoid pinning a same page >>>>>>> for multiple times. >>>>>>> >>>>>>> With the notifier carrying only a pfn, to find the iova by this pfn, >>>>>>> the vendor driver must *also* keep a reverse-mapping. That's a bit >>>>>>> too much. >>>>>>> >>>>>>> Since the vendor could also suffer from IOMMU-compatible problem, >>>>>>> which means a local cache is always helpful, so I'd like to have the >>>>>>> iova carried to the notifier. >>>>>>> >>>>>>> What'd you say? >>>>>> >>>>>> I agree, the pfn is not unique, multiple guest pfns (iovas) might be >>>>>> backed by the same host pfn. DMA_UNMAP calls are based on iova, the >>>>>> notifier through to the vendor driver must be based on the same. >>>>> >>>>> Host pfn should be unique, right? >>>> >>>> Let's say a user does a malloc of a single page and does 100 calls to >>>> MAP_DMA populating 100 pages of IOVA space all backed by the same >>>> malloc'd page. This is valid, I have unit tests that do essentially >>>> this. Those will all have the same pfn. The user then does an >>>> UNMAP_DMA to a single one of those IOVA pages. Did the user unmap >>>> everything matching that pfn? Of course not, they only unmapped that >>>> one IOVA page. There is no guarantee of a 1:1 mapping of pfn to IOVA. >>>> UNMAP_DMA works based on IOVA. Invalidation broadcasts to the vendor >>>> driver MUST therefore also work based on IOVA. This is not an academic >>>> problem, address space aliases exist in real VMs, imagine a virtual >>>> IOMMU. Thanks, >>>> >>> >>> >>> So struct vfio_iommu_type1_dma_unmap should be passed as argument to >>> notifier callback: >>> >>> if (unmapped && iommu->external_domain) >>> - vfio_notifier_call_chain(iommu, unmap); >>> + blocking_notifier_call_chain(&iommu->notifier, >>> + VFIO_IOMMU_NOTIFY_DMA_UNMAP, >>> + unmap); >>> >>> Then vendor driver should find pfns he has pinned from this range of >>> iovas, then invalidate and unpin pfns. Right? >> >> That seems like a valid choice. It's probably better than calling the >> notifier for each page of iova. Thanks, >> >> Alex >> > Hi Kirti, > > This version requires the *vendor driver* call vfio_register_notifier > for an mdev device before any pinning operations. I guess all of the > vendor drivers may have some alike code for notifier > registration/unregistration. > > My question is, how about letting the mdev framework managing the > notifier registration/unregistration process? > > We could add a notifier_fn_t callback to "struct parent_ops", then the > mdev framework should make sure that the vendor driver assigned a value > to this callback. The mdev core could initiate a notifier_block for each > parent driver with its callback, and register/unregister it to vfio in > the right time. > Module mdev_core is independent of VFIO so far and it should be independent of VFIO module. Its a good suggestion to have a notifier callback in parent_ops, but we shouldn't call vfio_register_notifier()/ vfio_unregister_notifier() from mdev core module. vfio_mdev module would take care to register/unregister notifier to vfio from its vfio_mdev_open()/ vfio_mdev_release() call. That looks cleaner to me. Notifier callback in parent_ops should be optional since all vendor drivers might not pin/unpin pages, for example sample mtty driver. Notifier would be registered to vfio module only if the notifier callback is provided by vendor driver. If this looks reasonable, I'll have this patch in my next version of patch series. Thanks, Kirti
* Kirti Wankhede <kwankhede@nvidia.com> [2016-11-01 13:17:19 +0530]: > > > On 11/1/2016 9:15 AM, Dong Jia Shi wrote: > > * Alex Williamson <alex.williamson@redhat.com> [2016-10-29 08:03:01 -0600]: > > > >> On Sat, 29 Oct 2016 16:07:05 +0530 > >> Kirti Wankhede <kwankhede@nvidia.com> wrote: > >> > >>> On 10/29/2016 2:03 AM, Alex Williamson wrote: > >>>> On Sat, 29 Oct 2016 01:32:35 +0530 > >>>> Kirti Wankhede <kwankhede@nvidia.com> wrote: > >>>> > >>>>> On 10/28/2016 6:10 PM, Alex Williamson wrote: > >>>>>> On Fri, 28 Oct 2016 15:33:58 +0800 > >>>>>> Jike Song <jike.song@intel.com> wrote: > >>>>>> > >>> ... > >>>>>>>> > >>>>>>>> +/* > >>>>>>>> + * This function finds pfn in domain->external_addr_space->pfn_list for given > >>>>>>>> + * iova range. If pfn exist, notify pfn to registered notifier list. On > >>>>>>>> + * receiving notifier callback, vendor driver should invalidate the mapping and > >>>>>>>> + * call vfio_unpin_pages() to unpin this pfn. With that vfio_pfn for this pfn > >>>>>>>> + * gets removed from rb tree of pfn_list. That re-arranges rb tree, so while > >>>>>>>> + * searching for next vfio_pfn in rb tree, start search from first node again. > >>>>>>>> + * If any vendor driver doesn't unpin that pfn, vfio_pfn would not get removed > >>>>>>>> + * from rb tree and so in next search vfio_pfn would be same as previous > >>>>>>>> + * vfio_pfn. In that case, exit from loop. > >>>>>>>> + */ > >>>>>>>> +static void vfio_notifier_call_chain(struct vfio_iommu *iommu, > >>>>>>>> + struct vfio_iommu_type1_dma_unmap *unmap) > >>>>>>>> +{ > >>>>>>>> + struct vfio_domain *domain = iommu->external_domain; > >>>>>>>> + struct rb_node *n; > >>>>>>>> + struct vfio_pfn *vpfn = NULL, *prev_vpfn; > >>>>>>>> + > >>>>>>>> + do { > >>>>>>>> + prev_vpfn = vpfn; > >>>>>>>> + mutex_lock(&domain->external_addr_space->pfn_list_lock); > >>>>>>>> + > >>>>>>>> + n = rb_first(&domain->external_addr_space->pfn_list); > >>>>>>>> + > >>>>>>>> + for (; n; n = rb_next(n), vpfn = NULL) { > >>>>>>>> + vpfn = rb_entry(n, struct vfio_pfn, node); > >>>>>>>> + > >>>>>>>> + if ((vpfn->iova >= unmap->iova) && > >>>>>>>> + (vpfn->iova < unmap->iova + unmap->size)) > >>>>>>>> + break; > >>>>>>>> + } > >>>>>>>> + > >>>>>>>> + mutex_unlock(&domain->external_addr_space->pfn_list_lock); > >>>>>>>> + > >>>>>>>> + /* Notify any listeners about DMA_UNMAP */ > >>>>>>>> + if (vpfn) > >>>>>>>> + blocking_notifier_call_chain(&iommu->notifier, > >>>>>>>> + VFIO_IOMMU_NOTIFY_DMA_UNMAP, > >>>>>>>> + &vpfn->pfn); > >>>>>>> > >>>>>>> Hi Kirti, > >>>>>>> > >>>>>>> The information carried by notifier is only a pfn. > >>>>>>> > >>>>>>> Since your pin/unpin interfaces design, it's the vendor driver who should > >>>>>>> guarantee pin/unpin same times. To achieve that, the vendor driver must > >>>>>>> cache it's iova->pfn mapping on its side, to avoid pinning a same page > >>>>>>> for multiple times. > >>>>>>> > >>>>>>> With the notifier carrying only a pfn, to find the iova by this pfn, > >>>>>>> the vendor driver must *also* keep a reverse-mapping. That's a bit > >>>>>>> too much. > >>>>>>> > >>>>>>> Since the vendor could also suffer from IOMMU-compatible problem, > >>>>>>> which means a local cache is always helpful, so I'd like to have the > >>>>>>> iova carried to the notifier. > >>>>>>> > >>>>>>> What'd you say? > >>>>>> > >>>>>> I agree, the pfn is not unique, multiple guest pfns (iovas) might be > >>>>>> backed by the same host pfn. DMA_UNMAP calls are based on iova, the > >>>>>> notifier through to the vendor driver must be based on the same. > >>>>> > >>>>> Host pfn should be unique, right? > >>>> > >>>> Let's say a user does a malloc of a single page and does 100 calls to > >>>> MAP_DMA populating 100 pages of IOVA space all backed by the same > >>>> malloc'd page. This is valid, I have unit tests that do essentially > >>>> this. Those will all have the same pfn. The user then does an > >>>> UNMAP_DMA to a single one of those IOVA pages. Did the user unmap > >>>> everything matching that pfn? Of course not, they only unmapped that > >>>> one IOVA page. There is no guarantee of a 1:1 mapping of pfn to IOVA. > >>>> UNMAP_DMA works based on IOVA. Invalidation broadcasts to the vendor > >>>> driver MUST therefore also work based on IOVA. This is not an academic > >>>> problem, address space aliases exist in real VMs, imagine a virtual > >>>> IOMMU. Thanks, > >>>> > >>> > >>> > >>> So struct vfio_iommu_type1_dma_unmap should be passed as argument to > >>> notifier callback: > >>> > >>> if (unmapped && iommu->external_domain) > >>> - vfio_notifier_call_chain(iommu, unmap); > >>> + blocking_notifier_call_chain(&iommu->notifier, > >>> + VFIO_IOMMU_NOTIFY_DMA_UNMAP, > >>> + unmap); > >>> > >>> Then vendor driver should find pfns he has pinned from this range of > >>> iovas, then invalidate and unpin pfns. Right? > >> > >> That seems like a valid choice. It's probably better than calling the > >> notifier for each page of iova. Thanks, > >> > >> Alex > >> > > Hi Kirti, > > > > This version requires the *vendor driver* call vfio_register_notifier > > for an mdev device before any pinning operations. I guess all of the > > vendor drivers may have some alike code for notifier > > registration/unregistration. > > > > My question is, how about letting the mdev framework managing the > > notifier registration/unregistration process? > > > > We could add a notifier_fn_t callback to "struct parent_ops", then the > > mdev framework should make sure that the vendor driver assigned a value > > to this callback. The mdev core could initiate a notifier_block for each > > parent driver with its callback, and register/unregister it to vfio in > > the right time. > > > > Module mdev_core is independent of VFIO so far and it should be > independent of VFIO module. > > Its a good suggestion to have a notifier callback in parent_ops, but we > shouldn't call vfio_register_notifier()/ vfio_unregister_notifier() from > mdev core module. vfio_mdev module would take care to > register/unregister notifier to vfio from its vfio_mdev_open()/ > vfio_mdev_release() call. That looks cleaner to me. Nod. > > Notifier callback in parent_ops should be optional since all vendor > drivers might not pin/unpin pages, for example sample mtty driver. > Notifier would be registered to vfio module only if the notifier > callback is provided by vendor driver. > > If this looks reasonable, I'll have this patch in my next version of > patch series. Sounds reasonable for me. Thanks, > > Thanks, > Kirti >
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 28b50ca14c52..ff05ac6b1e90 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -1891,6 +1891,79 @@ err_unpin_pages: } EXPORT_SYMBOL(vfio_unpin_pages); +int vfio_register_notifier(struct device *dev, struct notifier_block *nb) +{ + struct vfio_container *container; + struct vfio_group *group; + struct vfio_iommu_driver *driver; + ssize_t ret; + + if (!dev || !nb) + return -EINVAL; + + group = vfio_group_get_from_dev(dev); + if (IS_ERR(group)) + return PTR_ERR(group); + + ret = vfio_group_add_container_user(group); + if (ret) + goto err_register_nb; + + container = group->container; + down_read(&container->group_lock); + + driver = container->iommu_driver; + if (likely(driver && driver->ops->register_notifier)) + ret = driver->ops->register_notifier(container->iommu_data, nb); + else + ret = -EINVAL; + + up_read(&container->group_lock); + vfio_group_try_dissolve_container(group); + +err_register_nb: + vfio_group_put(group); + return ret; +} +EXPORT_SYMBOL(vfio_register_notifier); + +int vfio_unregister_notifier(struct device *dev, struct notifier_block *nb) +{ + struct vfio_container *container; + struct vfio_group *group; + struct vfio_iommu_driver *driver; + ssize_t ret; + + if (!dev || !nb) + return -EINVAL; + + group = vfio_group_get_from_dev(dev); + if (IS_ERR(group)) + return PTR_ERR(group); + + ret = vfio_group_add_container_user(group); + if (ret) + goto err_unregister_nb; + + container = group->container; + down_read(&container->group_lock); + + driver = container->iommu_driver; + if (likely(driver && driver->ops->unregister_notifier)) + ret = driver->ops->unregister_notifier(container->iommu_data, + nb); + else + ret = -EINVAL; + + up_read(&container->group_lock); + vfio_group_try_dissolve_container(group); + +err_unregister_nb: + vfio_group_put(group); + return ret; +} +EXPORT_SYMBOL(vfio_unregister_notifier); + /** * Module/class support */ diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 5add11a147e1..a4bd331ac0fd 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -37,6 +37,7 @@ #include <linux/vfio.h> #include <linux/workqueue.h> #include <linux/mdev.h> +#include <linux/notifier.h> #define DRIVER_VERSION "0.2" #define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>" @@ -59,6 +60,7 @@ struct vfio_iommu { struct vfio_domain *external_domain; /* domain for external user */ struct mutex lock; struct rb_root dma_list; + struct blocking_notifier_head notifier; bool v2; bool nesting; }; @@ -549,7 +551,8 @@ static long vfio_iommu_type1_pin_pages(void *iommu_data, mutex_lock(&iommu->lock); - if (!iommu->external_domain) { + /* Fail if notifier list is empty */ + if ((!iommu->external_domain) || (!iommu->notifier.head)) { ret = -EINVAL; goto pin_done; } @@ -768,6 +771,50 @@ static unsigned long vfio_pgsize_bitmap(struct vfio_iommu *iommu) return bitmap; } +/* + * This function finds pfn in domain->external_addr_space->pfn_list for given + * iova range. If pfn exist, notify pfn to registered notifier list. On + * receiving notifier callback, vendor driver should invalidate the mapping and + * call vfio_unpin_pages() to unpin this pfn. With that vfio_pfn for this pfn + * gets removed from rb tree of pfn_list. That re-arranges rb tree, so while + * searching for next vfio_pfn in rb tree, start search from first node again. + * If any vendor driver doesn't unpin that pfn, vfio_pfn would not get removed + * from rb tree and so in next search vfio_pfn would be same as previous + * vfio_pfn. In that case, exit from loop. + */ +static void vfio_notifier_call_chain(struct vfio_iommu *iommu, + struct vfio_iommu_type1_dma_unmap *unmap) +{ + struct vfio_domain *domain = iommu->external_domain; + struct rb_node *n; + struct vfio_pfn *vpfn = NULL, *prev_vpfn; + + do { + prev_vpfn = vpfn; + mutex_lock(&domain->external_addr_space->pfn_list_lock); + + n = rb_first(&domain->external_addr_space->pfn_list); + + for (; n; n = rb_next(n), vpfn = NULL) { + vpfn = rb_entry(n, struct vfio_pfn, node); + + if ((vpfn->iova >= unmap->iova) && + (vpfn->iova < unmap->iova + unmap->size)) + break; + } + + mutex_unlock(&domain->external_addr_space->pfn_list_lock); + + /* Notify any listeners about DMA_UNMAP */ + if (vpfn) + blocking_notifier_call_chain(&iommu->notifier, + VFIO_IOMMU_NOTIFY_DMA_UNMAP, + &vpfn->pfn); + } while (vpfn && (prev_vpfn != vpfn)); + + WARN_ON(vpfn); +} + static int vfio_dma_do_unmap(struct vfio_iommu *iommu, struct vfio_iommu_type1_dma_unmap *unmap) { @@ -844,6 +891,9 @@ unlock: /* Report how much was unmapped */ unmap->size = unmapped; + if (unmapped && iommu->external_domain) + vfio_notifier_call_chain(iommu, unmap); + return ret; } @@ -1418,6 +1468,7 @@ static void *vfio_iommu_type1_open(unsigned long arg) INIT_LIST_HEAD(&iommu->domain_list); iommu->dma_list = RB_ROOT; mutex_init(&iommu->lock); + BLOCKING_INIT_NOTIFIER_HEAD(&iommu->notifier); return iommu; } @@ -1555,16 +1606,34 @@ static long vfio_iommu_type1_ioctl(void *iommu_data, return -ENOTTY; } +static int vfio_iommu_type1_register_notifier(void *iommu_data, + struct notifier_block *nb) +{ + struct vfio_iommu *iommu = iommu_data; + + return blocking_notifier_chain_register(&iommu->notifier, nb); +} + +static int vfio_iommu_type1_unregister_notifier(void *iommu_data, + struct notifier_block *nb) +{ + struct vfio_iommu *iommu = iommu_data; + + return blocking_notifier_chain_unregister(&iommu->notifier, nb); +} + static const struct vfio_iommu_driver_ops vfio_iommu_driver_ops_type1 = { - .name = "vfio-iommu-type1", - .owner = THIS_MODULE, - .open = vfio_iommu_type1_open, - .release = vfio_iommu_type1_release, - .ioctl = vfio_iommu_type1_ioctl, - .attach_group = vfio_iommu_type1_attach_group, - .detach_group = vfio_iommu_type1_detach_group, - .pin_pages = vfio_iommu_type1_pin_pages, - .unpin_pages = vfio_iommu_type1_unpin_pages, + .name = "vfio-iommu-type1", + .owner = THIS_MODULE, + .open = vfio_iommu_type1_open, + .release = vfio_iommu_type1_release, + .ioctl = vfio_iommu_type1_ioctl, + .attach_group = vfio_iommu_type1_attach_group, + .detach_group = vfio_iommu_type1_detach_group, + .pin_pages = vfio_iommu_type1_pin_pages, + .unpin_pages = vfio_iommu_type1_unpin_pages, + .register_notifier = vfio_iommu_type1_register_notifier, + .unregister_notifier = vfio_iommu_type1_unregister_notifier, }; static int __init vfio_iommu_type1_init(void) diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 0609a2052846..4c91ce8bfaeb 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -80,6 +80,10 @@ struct vfio_iommu_driver_ops { unsigned long *phys_pfn); long (*unpin_pages)(void *iommu_data, unsigned long *pfn, long npage); + int (*register_notifier)(void *iommu_data, + struct notifier_block *nb); + int (*unregister_notifier)(void *iommu_data, + struct notifier_block *nb); }; extern int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops); @@ -137,6 +141,13 @@ extern long vfio_pin_pages(struct device *dev, unsigned long *user_pfn, extern long vfio_unpin_pages(struct device *dev, unsigned long *pfn, long npage); +#define VFIO_IOMMU_NOTIFY_DMA_UNMAP 1 + +extern int vfio_register_notifier(struct device *dev, + struct notifier_block *nb); + +extern int vfio_unregister_notifier(struct device *dev, + struct notifier_block *nb); /* * IRQfd - generic */