Message ID | 20220509071426.155941-1-lulu@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v1] vdpa: Do not count the pages that were already pinned in the vhost-vDPA | expand |
On Mon, May 9, 2022 at 3:15 PM Cindy Lu <lulu@redhat.com> wrote: > > We count pinned_vm as follow in vhost-vDPA > > lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; > if (npages + atomic64_read(&dev->mm->pinned_vm) > lock_limit) { > ret = -ENOMEM; > goto unlock; > } > This means if we have two vDPA devices for the same VM the pages would be counted twice > So we add a tree to save the page that counted and we will not count it again > > Signed-off-by: Cindy Lu <lulu@redhat.com> > --- > drivers/vhost/vdpa.c | 79 ++++++++++++++++++++++++++++++++++++++-- > include/linux/mm_types.h | 2 + > 2 files changed, 78 insertions(+), 3 deletions(-) > > diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c > index 05f5fd2af58f..48cb5c8264b5 100644 > --- a/drivers/vhost/vdpa.c > +++ b/drivers/vhost/vdpa.c > @@ -24,6 +24,9 @@ > #include <linux/vhost.h> > > #include "vhost.h" > +#include <linux/rbtree.h> > +#include <linux/interval_tree.h> > +#include <linux/interval_tree_generic.h> > > enum { > VHOST_VDPA_BACKEND_FEATURES = > @@ -505,6 +508,50 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, > mutex_unlock(&d->mutex); > return r; > } > +int vhost_vdpa_add_range_ctx(struct rb_root_cached *root, u64 start, u64 last) > +{ > + struct interval_tree_node *new_node; > + > + if (last < start) > + return -EFAULT; > + > + /* If the range being mapped is [0, ULONG_MAX], split it into two entries > + * otherwise its size would overflow u64. > + */ > + if (start == 0 && last == ULONG_MAX) { > + u64 mid = last / 2; > + > + vhost_vdpa_add_range_ctx(root, start, mid); > + start = mid + 1; > + } > + > + new_node = kmalloc(sizeof(struct interval_tree_node), GFP_ATOMIC); > + if (!new_node) > + return -ENOMEM; > + > + new_node->start = start; > + new_node->last = last; > + > + interval_tree_insert(new_node, root); > + > + return 0; > +} > + > +void vhost_vdpa_del_range(struct rb_root_cached *root, u64 start, u64 last) > +{ > + struct interval_tree_node *new_node; > + > + while ((new_node = interval_tree_iter_first(root, start, last))) { > + interval_tree_remove(new_node, root); > + kfree(new_node); > + } > +} > + > +struct interval_tree_node *vhost_vdpa_search_range(struct rb_root_cached *root, > + u64 start, u64 last) > +{ > + return interval_tree_iter_first(root, start, last); > +} > > static void vhost_vdpa_pa_unmap(struct vhost_vdpa *v, u64 start, u64 last) > { > @@ -513,6 +560,7 @@ static void vhost_vdpa_pa_unmap(struct vhost_vdpa *v, u64 start, u64 last) > struct vhost_iotlb_map *map; > struct page *page; > unsigned long pfn, pinned; > + struct interval_tree_node *new_node = NULL; > > while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) { > pinned = PFN_DOWN(map->size); > @@ -523,7 +571,18 @@ static void vhost_vdpa_pa_unmap(struct vhost_vdpa *v, u64 start, u64 last) > set_page_dirty_lock(page); > unpin_user_page(page); > } > - atomic64_sub(PFN_DOWN(map->size), &dev->mm->pinned_vm); > + > + new_node = vhost_vdpa_search_range(&dev->mm->root_for_vdpa, > + map->start, > + map->start + map->size - 1); > + > + if (new_node) { > + vhost_vdpa_del_range(&dev->mm->root_for_vdpa, > + map->start, > + map->start + map->size - 1); > + atomic64_sub(PFN_DOWN(map->size), &dev->mm->pinned_vm); > + } > + > vhost_iotlb_map_free(iotlb, map); > } > } > @@ -591,6 +650,7 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, u64 iova, > struct vdpa_device *vdpa = v->vdpa; > const struct vdpa_config_ops *ops = vdpa->config; > int r = 0; > + struct interval_tree_node *new_node = NULL; > > r = vhost_iotlb_add_range_ctx(dev->iotlb, iova, iova + size - 1, > pa, perm, opaque); > @@ -611,9 +671,22 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, u64 iova, > return r; > } > > - if (!vdpa->use_va) > - atomic64_add(PFN_DOWN(size), &dev->mm->pinned_vm); > + if (!vdpa->use_va) { > + new_node = vhost_vdpa_search_range(&dev->mm->root_for_vdpa, > + iova, iova + size - 1); > + > + if (new_node == 0) { > + r = vhost_vdpa_add_range_ctx(&dev->mm->root_for_vdpa, > + iova, iova + size - 1); > + if (r) { > + vhost_iotlb_del_range(dev->iotlb, iova, > + iova + size - 1); > + return r; > + } > > + atomic64_add(PFN_DOWN(size), &dev->mm->pinned_vm); > + } This seems not sufficient, consider: vhost-vDPA-A: add [A, B) vhost-vDPA-B: add [A, C) (C>B) We lose the accounting for [B, C)? > + } > return 0; > } > > diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h > index 5140e5feb486..46eaa6d0560b 100644 > --- a/include/linux/mm_types.h > +++ b/include/linux/mm_types.h > @@ -634,6 +634,8 @@ struct mm_struct { > #ifdef CONFIG_IOMMU_SUPPORT > u32 pasid; > #endif > + struct rb_root_cached root_for_vdpa; > + Let's avoid touching mm_structure unless it's a must. We can allocate something like vhost_mm if needed during SET_OWNER. Thanks > } __randomize_layout; > > /* > -- > 2.34.1 >
Hi Cindy, Thank you for the patch! Perhaps something to improve: [auto build test WARNING on mst-vhost/linux-next] [also build test WARNING on linux/master linus/master v5.18-rc6] [cannot apply to next-20220506] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch] url: https://github.com/intel-lab-lkp/linux/commits/Cindy-Lu/vdpa-Do-not-count-the-pages-that-were-already-pinned-in-the-vhost-vDPA/20220509-152644 base: https://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git linux-next config: s390-randconfig-r044-20220509 (https://download.01.org/0day-ci/archive/20220509/202205091928.dheTGNAt-lkp@intel.com/config) compiler: s390-linux-gcc (GCC) 11.3.0 reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # https://github.com/intel-lab-lkp/linux/commit/4225cc2a756b75d1e0ff7ca2a593bada42def380 git remote add linux-review https://github.com/intel-lab-lkp/linux git fetch --no-tags linux-review Cindy-Lu/vdpa-Do-not-count-the-pages-that-were-already-pinned-in-the-vhost-vDPA/20220509-152644 git checkout 4225cc2a756b75d1e0ff7ca2a593bada42def380 # save the config file mkdir build_dir && cp config build_dir/.config COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-11.3.0 make.cross W=1 O=build_dir ARCH=s390 SHELL=/bin/bash drivers/vhost/ If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot <lkp@intel.com> All warnings (new ones prefixed by >>): >> drivers/vhost/vdpa.c:542:5: warning: no previous prototype for 'vhost_vdpa_add_range_ctx' [-Wmissing-prototypes] 542 | int vhost_vdpa_add_range_ctx(struct rb_root_cached *root, u64 start, u64 last) | ^~~~~~~~~~~~~~~~~~~~~~~~ >> drivers/vhost/vdpa.c:571:6: warning: no previous prototype for 'vhost_vdpa_del_range' [-Wmissing-prototypes] 571 | void vhost_vdpa_del_range(struct rb_root_cached *root, u64 start, u64 last) | ^~~~~~~~~~~~~~~~~~~~ >> drivers/vhost/vdpa.c:581:28: warning: no previous prototype for 'vhost_vdpa_search_range' [-Wmissing-prototypes] 581 | struct interval_tree_node *vhost_vdpa_search_range(struct rb_root_cached *root, | ^~~~~~~~~~~~~~~~~~~~~~~ vim +/vhost_vdpa_add_range_ctx +542 drivers/vhost/vdpa.c 464 465 static long vhost_vdpa_unlocked_ioctl(struct file *filep, 466 unsigned int cmd, unsigned long arg) 467 { 468 struct vhost_vdpa *v = filep->private_data; 469 struct vhost_dev *d = &v->vdev; 470 void __user *argp = (void __user *)arg; 471 u64 __user *featurep = argp; 472 u64 features; 473 long r = 0; 474 475 if (cmd == VHOST_SET_BACKEND_FEATURES) { 476 if (copy_from_user(&features, featurep, sizeof(features))) 477 return -EFAULT; 478 if (features & ~VHOST_VDPA_BACKEND_FEATURES) 479 return -EOPNOTSUPP; 480 vhost_set_backend_features(&v->vdev, features); 481 return 0; 482 } 483 484 mutex_lock(&d->mutex); 485 486 switch (cmd) { 487 case VHOST_VDPA_GET_DEVICE_ID: 488 r = vhost_vdpa_get_device_id(v, argp); 489 break; 490 case VHOST_VDPA_GET_STATUS: 491 r = vhost_vdpa_get_status(v, argp); 492 break; 493 case VHOST_VDPA_SET_STATUS: 494 r = vhost_vdpa_set_status(v, argp); 495 break; 496 case VHOST_VDPA_GET_CONFIG: 497 r = vhost_vdpa_get_config(v, argp); 498 break; 499 case VHOST_VDPA_SET_CONFIG: 500 r = vhost_vdpa_set_config(v, argp); 501 break; 502 case VHOST_GET_FEATURES: 503 r = vhost_vdpa_get_features(v, argp); 504 break; 505 case VHOST_SET_FEATURES: 506 r = vhost_vdpa_set_features(v, argp); 507 break; 508 case VHOST_VDPA_GET_VRING_NUM: 509 r = vhost_vdpa_get_vring_num(v, argp); 510 break; 511 case VHOST_SET_LOG_BASE: 512 case VHOST_SET_LOG_FD: 513 r = -ENOIOCTLCMD; 514 break; 515 case VHOST_VDPA_SET_CONFIG_CALL: 516 r = vhost_vdpa_set_config_call(v, argp); 517 break; 518 case VHOST_GET_BACKEND_FEATURES: 519 features = VHOST_VDPA_BACKEND_FEATURES; 520 if (copy_to_user(featurep, &features, sizeof(features))) 521 r = -EFAULT; 522 break; 523 case VHOST_VDPA_GET_IOVA_RANGE: 524 r = vhost_vdpa_get_iova_range(v, argp); 525 break; 526 case VHOST_VDPA_GET_CONFIG_SIZE: 527 r = vhost_vdpa_get_config_size(v, argp); 528 break; 529 case VHOST_VDPA_GET_VQS_COUNT: 530 r = vhost_vdpa_get_vqs_count(v, argp); 531 break; 532 default: 533 r = vhost_dev_ioctl(&v->vdev, cmd, argp); 534 if (r == -ENOIOCTLCMD) 535 r = vhost_vdpa_vring_ioctl(v, cmd, argp); 536 break; 537 } 538 539 mutex_unlock(&d->mutex); 540 return r; 541 } > 542 int vhost_vdpa_add_range_ctx(struct rb_root_cached *root, u64 start, u64 last) 543 { 544 struct interval_tree_node *new_node; 545 546 if (last < start) 547 return -EFAULT; 548 549 /* If the range being mapped is [0, ULONG_MAX], split it into two entries 550 * otherwise its size would overflow u64. 551 */ 552 if (start == 0 && last == ULONG_MAX) { 553 u64 mid = last / 2; 554 555 vhost_vdpa_add_range_ctx(root, start, mid); 556 start = mid + 1; 557 } 558 559 new_node = kmalloc(sizeof(struct interval_tree_node), GFP_ATOMIC); 560 if (!new_node) 561 return -ENOMEM; 562 563 new_node->start = start; 564 new_node->last = last; 565 566 interval_tree_insert(new_node, root); 567 568 return 0; 569 } 570 > 571 void vhost_vdpa_del_range(struct rb_root_cached *root, u64 start, u64 last) 572 { 573 struct interval_tree_node *new_node; 574 575 while ((new_node = interval_tree_iter_first(root, start, last))) { 576 interval_tree_remove(new_node, root); 577 kfree(new_node); 578 } 579 } 580 > 581 struct interval_tree_node *vhost_vdpa_search_range(struct rb_root_cached *root, 582 u64 start, u64 last) 583 { 584 return interval_tree_iter_first(root, start, last); 585 } 586
Hi Cindy, Thank you for the patch! Perhaps something to improve: [auto build test WARNING on mst-vhost/linux-next] [also build test WARNING on linux/master linus/master v5.18-rc6] [cannot apply to next-20220506] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch] url: https://github.com/intel-lab-lkp/linux/commits/Cindy-Lu/vdpa-Do-not-count-the-pages-that-were-already-pinned-in-the-vhost-vDPA/20220509-152644 base: https://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git linux-next config: x86_64-randconfig-c007-20220509 (https://download.01.org/0day-ci/archive/20220509/202205092017.ywsnJTzp-lkp@intel.com/config) compiler: clang version 15.0.0 (https://github.com/llvm/llvm-project a385645b470e2d3a1534aae618ea56b31177639f) reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # https://github.com/intel-lab-lkp/linux/commit/4225cc2a756b75d1e0ff7ca2a593bada42def380 git remote add linux-review https://github.com/intel-lab-lkp/linux git fetch --no-tags linux-review Cindy-Lu/vdpa-Do-not-count-the-pages-that-were-already-pinned-in-the-vhost-vDPA/20220509-152644 git checkout 4225cc2a756b75d1e0ff7ca2a593bada42def380 # save the config file mkdir build_dir && cp config build_dir/.config COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash drivers/vhost/ If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot <lkp@intel.com> All warnings (new ones prefixed by >>): >> drivers/vhost/vdpa.c:542:5: warning: no previous prototype for function 'vhost_vdpa_add_range_ctx' [-Wmissing-prototypes] int vhost_vdpa_add_range_ctx(struct rb_root_cached *root, u64 start, u64 last) ^ drivers/vhost/vdpa.c:542:1: note: declare 'static' if the function is not intended to be used outside of this translation unit int vhost_vdpa_add_range_ctx(struct rb_root_cached *root, u64 start, u64 last) ^ static >> drivers/vhost/vdpa.c:571:6: warning: no previous prototype for function 'vhost_vdpa_del_range' [-Wmissing-prototypes] void vhost_vdpa_del_range(struct rb_root_cached *root, u64 start, u64 last) ^ drivers/vhost/vdpa.c:571:1: note: declare 'static' if the function is not intended to be used outside of this translation unit void vhost_vdpa_del_range(struct rb_root_cached *root, u64 start, u64 last) ^ static >> drivers/vhost/vdpa.c:581:28: warning: no previous prototype for function 'vhost_vdpa_search_range' [-Wmissing-prototypes] struct interval_tree_node *vhost_vdpa_search_range(struct rb_root_cached *root, ^ drivers/vhost/vdpa.c:581:1: note: declare 'static' if the function is not intended to be used outside of this translation unit struct interval_tree_node *vhost_vdpa_search_range(struct rb_root_cached *root, ^ static 3 warnings generated. vim +/vhost_vdpa_add_range_ctx +542 drivers/vhost/vdpa.c 464 465 static long vhost_vdpa_unlocked_ioctl(struct file *filep, 466 unsigned int cmd, unsigned long arg) 467 { 468 struct vhost_vdpa *v = filep->private_data; 469 struct vhost_dev *d = &v->vdev; 470 void __user *argp = (void __user *)arg; 471 u64 __user *featurep = argp; 472 u64 features; 473 long r = 0; 474 475 if (cmd == VHOST_SET_BACKEND_FEATURES) { 476 if (copy_from_user(&features, featurep, sizeof(features))) 477 return -EFAULT; 478 if (features & ~VHOST_VDPA_BACKEND_FEATURES) 479 return -EOPNOTSUPP; 480 vhost_set_backend_features(&v->vdev, features); 481 return 0; 482 } 483 484 mutex_lock(&d->mutex); 485 486 switch (cmd) { 487 case VHOST_VDPA_GET_DEVICE_ID: 488 r = vhost_vdpa_get_device_id(v, argp); 489 break; 490 case VHOST_VDPA_GET_STATUS: 491 r = vhost_vdpa_get_status(v, argp); 492 break; 493 case VHOST_VDPA_SET_STATUS: 494 r = vhost_vdpa_set_status(v, argp); 495 break; 496 case VHOST_VDPA_GET_CONFIG: 497 r = vhost_vdpa_get_config(v, argp); 498 break; 499 case VHOST_VDPA_SET_CONFIG: 500 r = vhost_vdpa_set_config(v, argp); 501 break; 502 case VHOST_GET_FEATURES: 503 r = vhost_vdpa_get_features(v, argp); 504 break; 505 case VHOST_SET_FEATURES: 506 r = vhost_vdpa_set_features(v, argp); 507 break; 508 case VHOST_VDPA_GET_VRING_NUM: 509 r = vhost_vdpa_get_vring_num(v, argp); 510 break; 511 case VHOST_SET_LOG_BASE: 512 case VHOST_SET_LOG_FD: 513 r = -ENOIOCTLCMD; 514 break; 515 case VHOST_VDPA_SET_CONFIG_CALL: 516 r = vhost_vdpa_set_config_call(v, argp); 517 break; 518 case VHOST_GET_BACKEND_FEATURES: 519 features = VHOST_VDPA_BACKEND_FEATURES; 520 if (copy_to_user(featurep, &features, sizeof(features))) 521 r = -EFAULT; 522 break; 523 case VHOST_VDPA_GET_IOVA_RANGE: 524 r = vhost_vdpa_get_iova_range(v, argp); 525 break; 526 case VHOST_VDPA_GET_CONFIG_SIZE: 527 r = vhost_vdpa_get_config_size(v, argp); 528 break; 529 case VHOST_VDPA_GET_VQS_COUNT: 530 r = vhost_vdpa_get_vqs_count(v, argp); 531 break; 532 default: 533 r = vhost_dev_ioctl(&v->vdev, cmd, argp); 534 if (r == -ENOIOCTLCMD) 535 r = vhost_vdpa_vring_ioctl(v, cmd, argp); 536 break; 537 } 538 539 mutex_unlock(&d->mutex); 540 return r; 541 } > 542 int vhost_vdpa_add_range_ctx(struct rb_root_cached *root, u64 start, u64 last) 543 { 544 struct interval_tree_node *new_node; 545 546 if (last < start) 547 return -EFAULT; 548 549 /* If the range being mapped is [0, ULONG_MAX], split it into two entries 550 * otherwise its size would overflow u64. 551 */ 552 if (start == 0 && last == ULONG_MAX) { 553 u64 mid = last / 2; 554 555 vhost_vdpa_add_range_ctx(root, start, mid); 556 start = mid + 1; 557 } 558 559 new_node = kmalloc(sizeof(struct interval_tree_node), GFP_ATOMIC); 560 if (!new_node) 561 return -ENOMEM; 562 563 new_node->start = start; 564 new_node->last = last; 565 566 interval_tree_insert(new_node, root); 567 568 return 0; 569 } 570 > 571 void vhost_vdpa_del_range(struct rb_root_cached *root, u64 start, u64 last) 572 { 573 struct interval_tree_node *new_node; 574 575 while ((new_node = interval_tree_iter_first(root, start, last))) { 576 interval_tree_remove(new_node, root); 577 kfree(new_node); 578 } 579 } 580 > 581 struct interval_tree_node *vhost_vdpa_search_range(struct rb_root_cached *root, 582 u64 start, u64 last) 583 { 584 return interval_tree_iter_first(root, start, last); 585 } 586
Hi Cindy, Thank you for the patch! Yet something to improve: [auto build test ERROR on mst-vhost/linux-next] [also build test ERROR on linux/master linus/master v5.18-rc6] [cannot apply to next-20220506] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch] url: https://github.com/intel-lab-lkp/linux/commits/Cindy-Lu/vdpa-Do-not-count-the-pages-that-were-already-pinned-in-the-vhost-vDPA/20220509-152644 base: https://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git linux-next config: x86_64-randconfig-a014-20220509 (https://download.01.org/0day-ci/archive/20220509/202205092058.if9wModg-lkp@intel.com/config) compiler: gcc-11 (Debian 11.2.0-20) 11.2.0 reproduce (this is a W=1 build): # https://github.com/intel-lab-lkp/linux/commit/4225cc2a756b75d1e0ff7ca2a593bada42def380 git remote add linux-review https://github.com/intel-lab-lkp/linux git fetch --no-tags linux-review Cindy-Lu/vdpa-Do-not-count-the-pages-that-were-already-pinned-in-the-vhost-vDPA/20220509-152644 git checkout 4225cc2a756b75d1e0ff7ca2a593bada42def380 # save the config file mkdir build_dir && cp config build_dir/.config make W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot <lkp@intel.com> All errors (new ones prefixed by >>, old ones prefixed by <<): >> ERROR: modpost: "interval_tree_remove" [drivers/vhost/vhost_vdpa.ko] undefined! >> ERROR: modpost: "interval_tree_insert" [drivers/vhost/vhost_vdpa.ko] undefined! >> ERROR: modpost: "interval_tree_iter_first" [drivers/vhost/vhost_vdpa.ko] undefined!
diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 05f5fd2af58f..48cb5c8264b5 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -24,6 +24,9 @@ #include <linux/vhost.h> #include "vhost.h" +#include <linux/rbtree.h> +#include <linux/interval_tree.h> +#include <linux/interval_tree_generic.h> enum { VHOST_VDPA_BACKEND_FEATURES = @@ -505,6 +508,50 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, mutex_unlock(&d->mutex); return r; } +int vhost_vdpa_add_range_ctx(struct rb_root_cached *root, u64 start, u64 last) +{ + struct interval_tree_node *new_node; + + if (last < start) + return -EFAULT; + + /* If the range being mapped is [0, ULONG_MAX], split it into two entries + * otherwise its size would overflow u64. + */ + if (start == 0 && last == ULONG_MAX) { + u64 mid = last / 2; + + vhost_vdpa_add_range_ctx(root, start, mid); + start = mid + 1; + } + + new_node = kmalloc(sizeof(struct interval_tree_node), GFP_ATOMIC); + if (!new_node) + return -ENOMEM; + + new_node->start = start; + new_node->last = last; + + interval_tree_insert(new_node, root); + + return 0; +} + +void vhost_vdpa_del_range(struct rb_root_cached *root, u64 start, u64 last) +{ + struct interval_tree_node *new_node; + + while ((new_node = interval_tree_iter_first(root, start, last))) { + interval_tree_remove(new_node, root); + kfree(new_node); + } +} + +struct interval_tree_node *vhost_vdpa_search_range(struct rb_root_cached *root, + u64 start, u64 last) +{ + return interval_tree_iter_first(root, start, last); +} static void vhost_vdpa_pa_unmap(struct vhost_vdpa *v, u64 start, u64 last) { @@ -513,6 +560,7 @@ static void vhost_vdpa_pa_unmap(struct vhost_vdpa *v, u64 start, u64 last) struct vhost_iotlb_map *map; struct page *page; unsigned long pfn, pinned; + struct interval_tree_node *new_node = NULL; while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) { pinned = PFN_DOWN(map->size); @@ -523,7 +571,18 @@ static void vhost_vdpa_pa_unmap(struct vhost_vdpa *v, u64 start, u64 last) set_page_dirty_lock(page); unpin_user_page(page); } - atomic64_sub(PFN_DOWN(map->size), &dev->mm->pinned_vm); + + new_node = vhost_vdpa_search_range(&dev->mm->root_for_vdpa, + map->start, + map->start + map->size - 1); + + if (new_node) { + vhost_vdpa_del_range(&dev->mm->root_for_vdpa, + map->start, + map->start + map->size - 1); + atomic64_sub(PFN_DOWN(map->size), &dev->mm->pinned_vm); + } + vhost_iotlb_map_free(iotlb, map); } } @@ -591,6 +650,7 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, u64 iova, struct vdpa_device *vdpa = v->vdpa; const struct vdpa_config_ops *ops = vdpa->config; int r = 0; + struct interval_tree_node *new_node = NULL; r = vhost_iotlb_add_range_ctx(dev->iotlb, iova, iova + size - 1, pa, perm, opaque); @@ -611,9 +671,22 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, u64 iova, return r; } - if (!vdpa->use_va) - atomic64_add(PFN_DOWN(size), &dev->mm->pinned_vm); + if (!vdpa->use_va) { + new_node = vhost_vdpa_search_range(&dev->mm->root_for_vdpa, + iova, iova + size - 1); + + if (new_node == 0) { + r = vhost_vdpa_add_range_ctx(&dev->mm->root_for_vdpa, + iova, iova + size - 1); + if (r) { + vhost_iotlb_del_range(dev->iotlb, iova, + iova + size - 1); + return r; + } + atomic64_add(PFN_DOWN(size), &dev->mm->pinned_vm); + } + } return 0; } diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 5140e5feb486..46eaa6d0560b 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -634,6 +634,8 @@ struct mm_struct { #ifdef CONFIG_IOMMU_SUPPORT u32 pasid; #endif + struct rb_root_cached root_for_vdpa; + } __randomize_layout; /*
We count pinned_vm as follow in vhost-vDPA lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; if (npages + atomic64_read(&dev->mm->pinned_vm) > lock_limit) { ret = -ENOMEM; goto unlock; } This means if we have two vDPA devices for the same VM the pages would be counted twice So we add a tree to save the page that counted and we will not count it again Signed-off-by: Cindy Lu <lulu@redhat.com> --- drivers/vhost/vdpa.c | 79 ++++++++++++++++++++++++++++++++++++++-- include/linux/mm_types.h | 2 + 2 files changed, 78 insertions(+), 3 deletions(-)