Message ID | 20201210073425.25960-2-zhukeqian1@huawei.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | vfio: iommu_type1: Some fixes and optimization | expand |
On Thu, 10 Dec 2020 15:34:19 +0800 Keqian Zhu <zhukeqian1@huawei.com> wrote: > Currently we do not clear added dirty bit of bitmap when unwind > pin, so if pin failed at halfway, we set unnecessary dirty bit > in bitmap. Clearing added dirty bit when unwind pin, userspace > will see less dirty page, which can save much time to handle them. > > Note that we should distinguish the bits added by pin and the bits > already set before pin, so introduce bitmap_added to record this. > > Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com> > --- > drivers/vfio/vfio_iommu_type1.c | 33 ++++++++++++++++++++++----------- > 1 file changed, 22 insertions(+), 11 deletions(-) > > diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c > index 67e827638995..f129d24a6ec3 100644 > --- a/drivers/vfio/vfio_iommu_type1.c > +++ b/drivers/vfio/vfio_iommu_type1.c > @@ -637,7 +637,11 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, > struct vfio_iommu *iommu = iommu_data; > struct vfio_group *group; > int i, j, ret; > + unsigned long pgshift = __ffs(iommu->pgsize_bitmap); > unsigned long remote_vaddr; > + unsigned long bitmap_offset; > + unsigned long *bitmap_added; > + dma_addr_t iova; > struct vfio_dma *dma; > bool do_accounting; > > @@ -650,6 +654,12 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, > > mutex_lock(&iommu->lock); > > + bitmap_added = bitmap_zalloc(npage, GFP_KERNEL); > + if (!bitmap_added) { > + ret = -ENOMEM; > + goto pin_done; > + } This is allocated regardless of whether dirty tracking is enabled, so this adds overhead to the common case in order to reduce user overhead (not correctness) in the rare condition that dirty tracking is enabled, and the even rarer condition that there's a fault during that case. This is not a good trade-off. Thanks, Alex > + > /* Fail if notifier list is empty */ > if (!iommu->notifier.head) { > ret = -EINVAL; > @@ -664,7 +674,6 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, > do_accounting = !IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu); > > for (i = 0; i < npage; i++) { > - dma_addr_t iova; > struct vfio_pfn *vpfn; > > iova = user_pfn[i] << PAGE_SHIFT; > @@ -699,14 +708,10 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, > } > > if (iommu->dirty_page_tracking) { > - unsigned long pgshift = __ffs(iommu->pgsize_bitmap); > - > - /* > - * Bitmap populated with the smallest supported page > - * size > - */ > - bitmap_set(dma->bitmap, > - (iova - dma->iova) >> pgshift, 1); > + /* Populated with the smallest supported page size */ > + bitmap_offset = (iova - dma->iova) >> pgshift; > + if (!test_and_set_bit(bitmap_offset, dma->bitmap)) > + set_bit(i, bitmap_added); > } > } > ret = i; > @@ -722,14 +727,20 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, > pin_unwind: > phys_pfn[i] = 0; > for (j = 0; j < i; j++) { > - dma_addr_t iova; > - > iova = user_pfn[j] << PAGE_SHIFT; > dma = vfio_find_dma(iommu, iova, PAGE_SIZE); > vfio_unpin_page_external(dma, iova, do_accounting); > phys_pfn[j] = 0; > + > + if (test_bit(j, bitmap_added)) { > + bitmap_offset = (iova - dma->iova) >> pgshift; > + clear_bit(bitmap_offset, dma->bitmap); > + } > } > pin_done: > + if (bitmap_added) > + bitmap_free(bitmap_added); > + > mutex_unlock(&iommu->lock); > return ret; > }
On 2020/12/11 3:16, Alex Williamson wrote: > On Thu, 10 Dec 2020 15:34:19 +0800 > Keqian Zhu <zhukeqian1@huawei.com> wrote: > >> Currently we do not clear added dirty bit of bitmap when unwind >> pin, so if pin failed at halfway, we set unnecessary dirty bit >> in bitmap. Clearing added dirty bit when unwind pin, userspace >> will see less dirty page, which can save much time to handle them. >> >> Note that we should distinguish the bits added by pin and the bits >> already set before pin, so introduce bitmap_added to record this. >> >> Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com> >> --- >> drivers/vfio/vfio_iommu_type1.c | 33 ++++++++++++++++++++++----------- >> 1 file changed, 22 insertions(+), 11 deletions(-) >> >> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c >> index 67e827638995..f129d24a6ec3 100644 >> --- a/drivers/vfio/vfio_iommu_type1.c >> +++ b/drivers/vfio/vfio_iommu_type1.c >> @@ -637,7 +637,11 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, >> struct vfio_iommu *iommu = iommu_data; >> struct vfio_group *group; >> int i, j, ret; >> + unsigned long pgshift = __ffs(iommu->pgsize_bitmap); >> unsigned long remote_vaddr; >> + unsigned long bitmap_offset; >> + unsigned long *bitmap_added; >> + dma_addr_t iova; >> struct vfio_dma *dma; >> bool do_accounting; >> >> @@ -650,6 +654,12 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, >> >> mutex_lock(&iommu->lock); >> >> + bitmap_added = bitmap_zalloc(npage, GFP_KERNEL); >> + if (!bitmap_added) { >> + ret = -ENOMEM; >> + goto pin_done; >> + } > > > This is allocated regardless of whether dirty tracking is enabled, so > this adds overhead to the common case in order to reduce user overhead > (not correctness) in the rare condition that dirty tracking is enabled, > and the even rarer condition that there's a fault during that case. > This is not a good trade-off. Thanks, Hi Alex, We can allocate the bitmap when dirty tracking is active, do you accept this? Or we can set the dirty bitmap after all pins succeed, but this costs cpu time to locate vfio_dma with iova. Thanks, Keqian > > Alex > > >> + >> /* Fail if notifier list is empty */ >> if (!iommu->notifier.head) { >> ret = -EINVAL; >> @@ -664,7 +674,6 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, >> do_accounting = !IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu); >> >> for (i = 0; i < npage; i++) { >> - dma_addr_t iova; >> struct vfio_pfn *vpfn; >> >> iova = user_pfn[i] << PAGE_SHIFT; >> @@ -699,14 +708,10 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, >> } >> >> if (iommu->dirty_page_tracking) { >> - unsigned long pgshift = __ffs(iommu->pgsize_bitmap); >> - >> - /* >> - * Bitmap populated with the smallest supported page >> - * size >> - */ >> - bitmap_set(dma->bitmap, >> - (iova - dma->iova) >> pgshift, 1); >> + /* Populated with the smallest supported page size */ >> + bitmap_offset = (iova - dma->iova) >> pgshift; >> + if (!test_and_set_bit(bitmap_offset, dma->bitmap)) >> + set_bit(i, bitmap_added); >> } >> } >> ret = i; >> @@ -722,14 +727,20 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, >> pin_unwind: >> phys_pfn[i] = 0; >> for (j = 0; j < i; j++) { >> - dma_addr_t iova; >> - >> iova = user_pfn[j] << PAGE_SHIFT; >> dma = vfio_find_dma(iommu, iova, PAGE_SIZE); >> vfio_unpin_page_external(dma, iova, do_accounting); >> phys_pfn[j] = 0; >> + >> + if (test_bit(j, bitmap_added)) { >> + bitmap_offset = (iova - dma->iova) >> pgshift; >> + clear_bit(bitmap_offset, dma->bitmap); >> + } >> } >> pin_done: >> + if (bitmap_added) >> + bitmap_free(bitmap_added); >> + >> mutex_unlock(&iommu->lock); >> return ret; >> } > > . >
On Fri, 11 Dec 2020 14:51:47 +0800 zhukeqian <zhukeqian1@huawei.com> wrote: > On 2020/12/11 3:16, Alex Williamson wrote: > > On Thu, 10 Dec 2020 15:34:19 +0800 > > Keqian Zhu <zhukeqian1@huawei.com> wrote: > > > >> Currently we do not clear added dirty bit of bitmap when unwind > >> pin, so if pin failed at halfway, we set unnecessary dirty bit > >> in bitmap. Clearing added dirty bit when unwind pin, userspace > >> will see less dirty page, which can save much time to handle them. > >> > >> Note that we should distinguish the bits added by pin and the bits > >> already set before pin, so introduce bitmap_added to record this. > >> > >> Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com> > >> --- > >> drivers/vfio/vfio_iommu_type1.c | 33 ++++++++++++++++++++++----------- > >> 1 file changed, 22 insertions(+), 11 deletions(-) > >> > >> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c > >> index 67e827638995..f129d24a6ec3 100644 > >> --- a/drivers/vfio/vfio_iommu_type1.c > >> +++ b/drivers/vfio/vfio_iommu_type1.c > >> @@ -637,7 +637,11 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, > >> struct vfio_iommu *iommu = iommu_data; > >> struct vfio_group *group; > >> int i, j, ret; > >> + unsigned long pgshift = __ffs(iommu->pgsize_bitmap); > >> unsigned long remote_vaddr; > >> + unsigned long bitmap_offset; > >> + unsigned long *bitmap_added; > >> + dma_addr_t iova; > >> struct vfio_dma *dma; > >> bool do_accounting; > >> > >> @@ -650,6 +654,12 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, > >> > >> mutex_lock(&iommu->lock); > >> > >> + bitmap_added = bitmap_zalloc(npage, GFP_KERNEL); > >> + if (!bitmap_added) { > >> + ret = -ENOMEM; > >> + goto pin_done; > >> + } > > > > > > This is allocated regardless of whether dirty tracking is enabled, so > > this adds overhead to the common case in order to reduce user overhead > > (not correctness) in the rare condition that dirty tracking is enabled, > > and the even rarer condition that there's a fault during that case. > > This is not a good trade-off. Thanks, > > Hi Alex, > > We can allocate the bitmap when dirty tracking is active, do you accept this? > Or we can set the dirty bitmap after all pins succeed, but this costs cpu time > to locate vfio_dma with iova. TBH I don't see this as a terribly significant problem, in the rare event of an error with dirty tracking enabled, the user might see some pages marked dirty that were not successfully pinned by the mdev vendor driver. The solution shouldn't impose more overhead than the original issue. Thanks, Alex > >> + > >> /* Fail if notifier list is empty */ > >> if (!iommu->notifier.head) { > >> ret = -EINVAL; > >> @@ -664,7 +674,6 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, > >> do_accounting = !IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu); > >> > >> for (i = 0; i < npage; i++) { > >> - dma_addr_t iova; > >> struct vfio_pfn *vpfn; > >> > >> iova = user_pfn[i] << PAGE_SHIFT; > >> @@ -699,14 +708,10 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, > >> } > >> > >> if (iommu->dirty_page_tracking) { > >> - unsigned long pgshift = __ffs(iommu->pgsize_bitmap); > >> - > >> - /* > >> - * Bitmap populated with the smallest supported page > >> - * size > >> - */ > >> - bitmap_set(dma->bitmap, > >> - (iova - dma->iova) >> pgshift, 1); > >> + /* Populated with the smallest supported page size */ > >> + bitmap_offset = (iova - dma->iova) >> pgshift; > >> + if (!test_and_set_bit(bitmap_offset, dma->bitmap)) > >> + set_bit(i, bitmap_added); > >> } > >> } > >> ret = i; > >> @@ -722,14 +727,20 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, > >> pin_unwind: > >> phys_pfn[i] = 0; > >> for (j = 0; j < i; j++) { > >> - dma_addr_t iova; > >> - > >> iova = user_pfn[j] << PAGE_SHIFT; > >> dma = vfio_find_dma(iommu, iova, PAGE_SIZE); > >> vfio_unpin_page_external(dma, iova, do_accounting); > >> phys_pfn[j] = 0; > >> + > >> + if (test_bit(j, bitmap_added)) { > >> + bitmap_offset = (iova - dma->iova) >> pgshift; > >> + clear_bit(bitmap_offset, dma->bitmap); > >> + } > >> } > >> pin_done: > >> + if (bitmap_added) > >> + bitmap_free(bitmap_added); > >> + > >> mutex_unlock(&iommu->lock); > >> return ret; > >> } > > > > . > > >
Hi Keqian, url: https://github.com/0day-ci/linux/commits/Keqian-Zhu/vfio-iommu_type1-Some-fixes-and-optimization/20201210-154322 base: https://github.com/awilliam/linux-vfio.git next config: x86_64-randconfig-m001-20201215 (attached as .config) compiler: gcc-9 (Debian 9.3.0-15) 9.3.0 If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot <lkp@intel.com> Reported-by: Dan Carpenter <dan.carpenter@oracle.com> smatch warnings: drivers/vfio/vfio_iommu_type1.c:648 vfio_iommu_type1_pin_pages() warn: variable dereferenced before check 'iommu' (see line 640) vim +/iommu +648 drivers/vfio/vfio_iommu_type1.c a54eb55045ae9b3 Kirti Wankhede 2016-11-17 631 static int vfio_iommu_type1_pin_pages(void *iommu_data, 95fc87b44104d9a Kirti Wankhede 2020-05-29 632 struct iommu_group *iommu_group, a54eb55045ae9b3 Kirti Wankhede 2016-11-17 633 unsigned long *user_pfn, a54eb55045ae9b3 Kirti Wankhede 2016-11-17 634 int npage, int prot, a54eb55045ae9b3 Kirti Wankhede 2016-11-17 635 unsigned long *phys_pfn) a54eb55045ae9b3 Kirti Wankhede 2016-11-17 636 { a54eb55045ae9b3 Kirti Wankhede 2016-11-17 637 struct vfio_iommu *iommu = iommu_data; 95fc87b44104d9a Kirti Wankhede 2020-05-29 638 struct vfio_group *group; a54eb55045ae9b3 Kirti Wankhede 2016-11-17 639 int i, j, ret; 2b172c0ea2a6daf Keqian Zhu 2020-12-10 @640 unsigned long pgshift = __ffs(iommu->pgsize_bitmap); ^^^^^^^^^^^^^^^^^^^^ The patch introduces a new dereference. a54eb55045ae9b3 Kirti Wankhede 2016-11-17 641 unsigned long remote_vaddr; 2b172c0ea2a6daf Keqian Zhu 2020-12-10 642 unsigned long bitmap_offset; 2b172c0ea2a6daf Keqian Zhu 2020-12-10 643 unsigned long *bitmap_added; 2b172c0ea2a6daf Keqian Zhu 2020-12-10 644 dma_addr_t iova; a54eb55045ae9b3 Kirti Wankhede 2016-11-17 645 struct vfio_dma *dma; a54eb55045ae9b3 Kirti Wankhede 2016-11-17 646 bool do_accounting; a54eb55045ae9b3 Kirti Wankhede 2016-11-17 647 a54eb55045ae9b3 Kirti Wankhede 2016-11-17 @648 if (!iommu || !user_pfn || !phys_pfn) ^^^^^^ Checked too late. a54eb55045ae9b3 Kirti Wankhede 2016-11-17 649 return -EINVAL; a54eb55045ae9b3 Kirti Wankhede 2016-11-17 650 a54eb55045ae9b3 Kirti Wankhede 2016-11-17 651 /* Supported for v2 version only */ a54eb55045ae9b3 Kirti Wankhede 2016-11-17 652 if (!iommu->v2) a54eb55045ae9b3 Kirti Wankhede 2016-11-17 653 return -EACCES; a54eb55045ae9b3 Kirti Wankhede 2016-11-17 654 a54eb55045ae9b3 Kirti Wankhede 2016-11-17 655 mutex_lock(&iommu->lock); a54eb55045ae9b3 Kirti Wankhede 2016-11-17 656 --- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 67e827638995..f129d24a6ec3 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -637,7 +637,11 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, struct vfio_iommu *iommu = iommu_data; struct vfio_group *group; int i, j, ret; + unsigned long pgshift = __ffs(iommu->pgsize_bitmap); unsigned long remote_vaddr; + unsigned long bitmap_offset; + unsigned long *bitmap_added; + dma_addr_t iova; struct vfio_dma *dma; bool do_accounting; @@ -650,6 +654,12 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, mutex_lock(&iommu->lock); + bitmap_added = bitmap_zalloc(npage, GFP_KERNEL); + if (!bitmap_added) { + ret = -ENOMEM; + goto pin_done; + } + /* Fail if notifier list is empty */ if (!iommu->notifier.head) { ret = -EINVAL; @@ -664,7 +674,6 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, do_accounting = !IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu); for (i = 0; i < npage; i++) { - dma_addr_t iova; struct vfio_pfn *vpfn; iova = user_pfn[i] << PAGE_SHIFT; @@ -699,14 +708,10 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, } if (iommu->dirty_page_tracking) { - unsigned long pgshift = __ffs(iommu->pgsize_bitmap); - - /* - * Bitmap populated with the smallest supported page - * size - */ - bitmap_set(dma->bitmap, - (iova - dma->iova) >> pgshift, 1); + /* Populated with the smallest supported page size */ + bitmap_offset = (iova - dma->iova) >> pgshift; + if (!test_and_set_bit(bitmap_offset, dma->bitmap)) + set_bit(i, bitmap_added); } } ret = i; @@ -722,14 +727,20 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, pin_unwind: phys_pfn[i] = 0; for (j = 0; j < i; j++) { - dma_addr_t iova; - iova = user_pfn[j] << PAGE_SHIFT; dma = vfio_find_dma(iommu, iova, PAGE_SIZE); vfio_unpin_page_external(dma, iova, do_accounting); phys_pfn[j] = 0; + + if (test_bit(j, bitmap_added)) { + bitmap_offset = (iova - dma->iova) >> pgshift; + clear_bit(bitmap_offset, dma->bitmap); + } } pin_done: + if (bitmap_added) + bitmap_free(bitmap_added); + mutex_unlock(&iommu->lock); return ret; }
Currently we do not clear added dirty bit of bitmap when unwind pin, so if pin failed at halfway, we set unnecessary dirty bit in bitmap. Clearing added dirty bit when unwind pin, userspace will see less dirty page, which can save much time to handle them. Note that we should distinguish the bits added by pin and the bits already set before pin, so introduce bitmap_added to record this. Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com> --- drivers/vfio/vfio_iommu_type1.c | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-)