Message ID | d07f07cdd545ab1a495a9a0da06e43ad97c069a2.1632171479.git.maciej.szmigiero@oracle.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | KVM: Scalable memslots implementation | expand |
On Mon, Sep 20, 2021, Maciej S. Szmigiero wrote: > From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> > > There is no point in recalculating from scratch the total number of pages > in all memslots each time a memslot is created or deleted. > > Just cache the value and update it accordingly on each such operation so > the code doesn't need to traverse the whole memslot array each time. > > Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> > --- > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index 28ef14155726..65fdf27b9423 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -11609,9 +11609,23 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, > const struct kvm_memory_slot *new, > enum kvm_mr_change change) > { > - if (!kvm->arch.n_requested_mmu_pages) > - kvm_mmu_change_mmu_pages(kvm, > - kvm_mmu_calculate_default_mmu_pages(kvm)); > + if (change == KVM_MR_CREATE) > + kvm->arch.n_memslots_pages += new->npages; > + else if (change == KVM_MR_DELETE) { > + WARN_ON(kvm->arch.n_memslots_pages < old->npages); > + kvm->arch.n_memslots_pages -= old->npages; > + } > + > + if (!kvm->arch.n_requested_mmu_pages) { Hmm, once n_requested_mmu_pages is set it can't be unset. That means this can be further optimized to skip avoid taking mmu_lock on flags-only changes (and memslot movement). E.g. if (!kvm->arch.n_requested_mmu_pages && (change == KVM_MR_CREATE || change == KVM_MR_DELETE)) { } It's a little risky, but kvm_vm_ioctl_set_nr_mmu_pages() would need to be modified to allow clearing n_requested_mmu_pages and it already takes slots_lock, so IMO it's ok to force kvm_vm_ioctl_set_nr_mmu_pages() to recalculate pages if it wants to allow reverting back to the default. > + u64 memslots_pages; > + unsigned long nr_mmu_pages; > + > + memslots_pages = kvm->arch.n_memslots_pages * KVM_PERMILLE_MMU_PAGES; > + do_div(memslots_pages, 1000); > + nr_mmu_pages = max_t(typeof(nr_mmu_pages), > + memslots_pages, KVM_MIN_ALLOC_MMU_PAGES); "memslots_pages" is a bit of a misnomer. Any objection to avoiding naming problems by explicitly casting to an "unsigned long" and simply operating on nr_mmu_pages? nr_mmu_pages = (unsigned long)kvm->arch.n_memslots_pages; nr_mmu_pages *= (KVM_PERMILLE_MMU_PAGES / 1000); nr_mmu_pages = max(nr_mmu_pages, KVM_MIN_ALLOC_MMU_PAGES); kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages); E.g. the whole thing can be if (!kvm->arch.n_requested_mmu_pages && (change == KVM_MR_CREATE || change == KVM_MR_DELETE)) { unsigned long nr_mmu_pages; if (change == KVM_MR_CREATE) { kvm->arch.n_memslots_pages += new->npages; } else { WARN_ON(kvm->arch.n_memslots_pages < old->npages); kvm->arch.n_memslots_pages -= old->npages; } nr_mmu_pages = (unsigned long)kvm->arch.n_memslots_pages; nr_mmu_pages *= (KVM_PERMILLE_MMU_PAGES / 1000); nr_mmu_pages = max(nr_mmu_pages, KVM_MIN_ALLOC_MMU_PAGES); kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages); } > + kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages); > + } > > kvm_mmu_slot_apply_flags(kvm, old, new, change); >
On Tue, Oct 19, 2021, Sean Christopherson wrote: > On Mon, Sep 20, 2021, Maciej S. Szmigiero wrote: > > From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> > > > > There is no point in recalculating from scratch the total number of pages > > in all memslots each time a memslot is created or deleted. > > > > Just cache the value and update it accordingly on each such operation so > > the code doesn't need to traverse the whole memslot array each time. > > > > Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> > > --- > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > > index 28ef14155726..65fdf27b9423 100644 > > --- a/arch/x86/kvm/x86.c > > +++ b/arch/x86/kvm/x86.c > > @@ -11609,9 +11609,23 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, > > const struct kvm_memory_slot *new, > > enum kvm_mr_change change) > > { > > - if (!kvm->arch.n_requested_mmu_pages) > > - kvm_mmu_change_mmu_pages(kvm, > > - kvm_mmu_calculate_default_mmu_pages(kvm)); > > + if (change == KVM_MR_CREATE) > > + kvm->arch.n_memslots_pages += new->npages; > > + else if (change == KVM_MR_DELETE) { > > + WARN_ON(kvm->arch.n_memslots_pages < old->npages); > > + kvm->arch.n_memslots_pages -= old->npages; > > + } > > + > > + if (!kvm->arch.n_requested_mmu_pages) { > > Hmm, once n_requested_mmu_pages is set it can't be unset. That means this can be > further optimized to skip avoid taking mmu_lock on flags-only changes (and > memslot movement). E.g. > > if (!kvm->arch.n_requested_mmu_pages && > (change == KVM_MR_CREATE || change == KVM_MR_DELETE)) { > > } > > It's a little risky, but kvm_vm_ioctl_set_nr_mmu_pages() would need to be modified > to allow clearing n_requested_mmu_pages and it already takes slots_lock, so IMO > it's ok to force kvm_vm_ioctl_set_nr_mmu_pages() to recalculate pages if it wants > to allow reverting back to the default. Doh, and then I read patch 2... I would swap the order of patch 2 and patch 1, that way the optimization patch is super simple, and you don't end up reworking a bunch of code that was added in the immediately preceding patch. E.g. as a first patch diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 28ef14155726..f3b1aed08566 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -11609,7 +11609,8 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, const struct kvm_memory_slot *new, enum kvm_mr_change change) { - if (!kvm->arch.n_requested_mmu_pages) + if (!kvm->arch.n_requested_mmu_pages && + (change == KVM_MR_CREATE || change == KVM_MR_DELETE)) kvm_mmu_change_mmu_pages(kvm, kvm_mmu_calculate_default_mmu_pages(kvm));
On 20.10.2021 00:31, Sean Christopherson wrote: > On Tue, Oct 19, 2021, Sean Christopherson wrote: >> On Mon, Sep 20, 2021, Maciej S. Szmigiero wrote: >>> From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> >>> >>> There is no point in recalculating from scratch the total number of pages >>> in all memslots each time a memslot is created or deleted. >>> >>> Just cache the value and update it accordingly on each such operation so >>> the code doesn't need to traverse the whole memslot array each time. >>> >>> Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> >>> --- >>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c >>> index 28ef14155726..65fdf27b9423 100644 >>> --- a/arch/x86/kvm/x86.c >>> +++ b/arch/x86/kvm/x86.c >>> @@ -11609,9 +11609,23 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, >>> const struct kvm_memory_slot *new, >>> enum kvm_mr_change change) >>> { >>> - if (!kvm->arch.n_requested_mmu_pages) >>> - kvm_mmu_change_mmu_pages(kvm, >>> - kvm_mmu_calculate_default_mmu_pages(kvm)); >>> + if (change == KVM_MR_CREATE) >>> + kvm->arch.n_memslots_pages += new->npages; >>> + else if (change == KVM_MR_DELETE) { >>> + WARN_ON(kvm->arch.n_memslots_pages < old->npages); >>> + kvm->arch.n_memslots_pages -= old->npages; >>> + } >>> + >>> + if (!kvm->arch.n_requested_mmu_pages) { >> >> Hmm, once n_requested_mmu_pages is set it can't be unset. That means this can be >> further optimized to skip avoid taking mmu_lock on flags-only changes (and >> memslot movement). E.g. >> >> if (!kvm->arch.n_requested_mmu_pages && >> (change == KVM_MR_CREATE || change == KVM_MR_DELETE)) { >> >> } >> >> It's a little risky, but kvm_vm_ioctl_set_nr_mmu_pages() would need to be modified >> to allow clearing n_requested_mmu_pages and it already takes slots_lock, so IMO >> it's ok to force kvm_vm_ioctl_set_nr_mmu_pages() to recalculate pages if it wants >> to allow reverting back to the default. > > Doh, and then I read patch 2... > > I would swap the order of patch 2 and patch 1, that way the optimization patch is > super simple, and you don't end up reworking a bunch of code that was added in the > immediately preceding patch. E.g. as a first patch > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index 28ef14155726..f3b1aed08566 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -11609,7 +11609,8 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, > const struct kvm_memory_slot *new, > enum kvm_mr_change change) > { > - if (!kvm->arch.n_requested_mmu_pages) > + if (!kvm->arch.n_requested_mmu_pages && > + (change == KVM_MR_CREATE || change == KVM_MR_DELETE)) > kvm_mmu_change_mmu_pages(kvm, > kvm_mmu_calculate_default_mmu_pages(kvm)); > > > Will do. Thanks, Maciej
On 20.10.2021 00:24, Sean Christopherson wrote: > On Mon, Sep 20, 2021, Maciej S. Szmigiero wrote: >> From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> >> >> There is no point in recalculating from scratch the total number of pages >> in all memslots each time a memslot is created or deleted. >> >> Just cache the value and update it accordingly on each such operation so >> the code doesn't need to traverse the whole memslot array each time. >> >> Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> >> --- >> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c >> index 28ef14155726..65fdf27b9423 100644 >> --- a/arch/x86/kvm/x86.c >> +++ b/arch/x86/kvm/x86.c >> @@ -11609,9 +11609,23 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, >> const struct kvm_memory_slot *new, >> enum kvm_mr_change change) >> { >> - if (!kvm->arch.n_requested_mmu_pages) >> - kvm_mmu_change_mmu_pages(kvm, >> - kvm_mmu_calculate_default_mmu_pages(kvm)); >> + if (change == KVM_MR_CREATE) >> + kvm->arch.n_memslots_pages += new->npages; >> + else if (change == KVM_MR_DELETE) { >> + WARN_ON(kvm->arch.n_memslots_pages < old->npages); >> + kvm->arch.n_memslots_pages -= old->npages; >> + } >> + >> + if (!kvm->arch.n_requested_mmu_pages) { (..) >> + u64 memslots_pages; >> + unsigned long nr_mmu_pages; >> + >> + memslots_pages = kvm->arch.n_memslots_pages * KVM_PERMILLE_MMU_PAGES; >> + do_div(memslots_pages, 1000); >> + nr_mmu_pages = max_t(typeof(nr_mmu_pages), >> + memslots_pages, KVM_MIN_ALLOC_MMU_PAGES); > > "memslots_pages" is a bit of a misnomer. Any objection to avoiding naming problems > by explicitly casting to an "unsigned long" and simply operating on nr_mmu_pages? > > nr_mmu_pages = (unsigned long)kvm->arch.n_memslots_pages; > nr_mmu_pages *= (KVM_PERMILLE_MMU_PAGES / 1000); > nr_mmu_pages = max(nr_mmu_pages, KVM_MIN_ALLOC_MMU_PAGES); > kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages); > > E.g. the whole thing can be > > if (!kvm->arch.n_requested_mmu_pages && > (change == KVM_MR_CREATE || change == KVM_MR_DELETE)) { > unsigned long nr_mmu_pages; > > if (change == KVM_MR_CREATE) { > kvm->arch.n_memslots_pages += new->npages; > } else { > WARN_ON(kvm->arch.n_memslots_pages < old->npages); > kvm->arch.n_memslots_pages -= old->npages; > } > > nr_mmu_pages = (unsigned long)kvm->arch.n_memslots_pages; > nr_mmu_pages *= (KVM_PERMILLE_MMU_PAGES / 1000); The above line will set nr_mmu_pages to zero since KVM_PERMILLE_MMU_PAGES is 20, so when integer-divided by 1000 will result in a multiplication coefficient of zero. > nr_mmu_pages = max(nr_mmu_pages, KVM_MIN_ALLOC_MMU_PAGES); > kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages); > } > >> + kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages); >> + } >> >> kvm_mmu_slot_apply_flags(kvm, old, new, change); >> Thanks, Maciej
On Wed, Oct 20, 2021, Maciej S. Szmigiero wrote: > On 20.10.2021 00:24, Sean Christopherson wrote: > > E.g. the whole thing can be > > > > if (!kvm->arch.n_requested_mmu_pages && > > (change == KVM_MR_CREATE || change == KVM_MR_DELETE)) { > > unsigned long nr_mmu_pages; > > > > if (change == KVM_MR_CREATE) { > > kvm->arch.n_memslots_pages += new->npages; > > } else { > > WARN_ON(kvm->arch.n_memslots_pages < old->npages); > > kvm->arch.n_memslots_pages -= old->npages; > > } > > > > nr_mmu_pages = (unsigned long)kvm->arch.n_memslots_pages; > > nr_mmu_pages *= (KVM_PERMILLE_MMU_PAGES / 1000); > > The above line will set nr_mmu_pages to zero since KVM_PERMILLE_MMU_PAGES > is 20, so when integer-divided by 1000 will result in a multiplication > coefficient of zero. Ugh, math. And thus do_div() to avoid the whole 64-bit divide issue on 32-bit KVM. Bummer.
On Wed, Oct 20, 2021, Sean Christopherson wrote: > On Wed, Oct 20, 2021, Maciej S. Szmigiero wrote: > > On 20.10.2021 00:24, Sean Christopherson wrote: > > > E.g. the whole thing can be > > > > > > if (!kvm->arch.n_requested_mmu_pages && > > > (change == KVM_MR_CREATE || change == KVM_MR_DELETE)) { > > > unsigned long nr_mmu_pages; > > > > > > if (change == KVM_MR_CREATE) { > > > kvm->arch.n_memslots_pages += new->npages; > > > } else { > > > WARN_ON(kvm->arch.n_memslots_pages < old->npages); > > > kvm->arch.n_memslots_pages -= old->npages; > > > } > > > > > > nr_mmu_pages = (unsigned long)kvm->arch.n_memslots_pages; > > > nr_mmu_pages *= (KVM_PERMILLE_MMU_PAGES / 1000); > > > > The above line will set nr_mmu_pages to zero since KVM_PERMILLE_MMU_PAGES > > is 20, so when integer-divided by 1000 will result in a multiplication > > coefficient of zero. > > Ugh, math. And thus do_div() to avoid the whole 64-bit divide issue on 32-bit KVM. > Bummer. I was revisiting this today because (a) simply making n_memslots_pages a u64 doesn't cleanly handle the case where the resulting nr_mmu_pages would wrap, (b) any fix in that are should really go in a separate patch to fix kvm_mmu_calculate_default_mmu_pages() and then carry that behavior forward But as I dove deeper (and deeper), I came to the conclusion that supporting a total number of memslot pages that doesn't fit in an unsigned long is a waste of our time. With a 32-bit kernel, userspace can at most address 3gb of virtual memory, whereas wrapping the total number of pages would require 4tb+ of guest physical memory. Even with x86's second address space for SMM, that means userspace would need to alias all of guest memory more than one _thousand_ times. And on older hardware with MAXPHYADDR < 43, the guest couldn't actually access any of those aliases even if userspace lied about guest.MAXPHYADDR. So unless I'm missing something, or PPC or MIPS has some crazy way for a 32-bit host to support 4TB of guest memory, my vote would be to explicitly disallow creating more memslot pages than can fit in an unsigned long. Then x86 KVM could reuse the cache nr_memslot_pages and x86's MMU wouldn't have to update a big pile of code to support a scenario that practically speaking is useless. diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 72b329e82089..acabdbdef5cf 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -552,6 +552,7 @@ struct kvm { */ struct mutex slots_arch_lock; struct mm_struct *mm; /* userspace tied to this vm */ + unsigned long nr_memslot_pages; struct kvm_memslots __rcu *memslots[KVM_ADDRESS_SPACE_NUM]; struct kvm_vcpu *vcpus[KVM_MAX_VCPUS]; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 8bf4b89cfa03..c63fc5c05322 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1567,6 +1567,15 @@ static void kvm_commit_memory_region(struct kvm *kvm, const struct kvm_memory_slot *new, enum kvm_mr_change change) { + /* + * Update the total number of memslot pages before calling the arch + * hook so that architectures can consume the result directly. + */ + if (change == KVM_MR_DELETE) + kvm->nr_memslot_pages -= old->npages; + else if (change == KVM_MR_CREATE) + kvm->nr_memslot_pages += new->npages; + kvm_arch_commit_memory_region(kvm, old, new, change); /* @@ -1738,6 +1747,9 @@ int __kvm_set_memory_region(struct kvm *kvm, if (!old || !old->npages) return -EINVAL; + if (WARN_ON_ONCE(kvm->nr_memslot_pages < old->npages)) + return -EIO; + memset(&new, 0, sizeof(new)); new.id = id; new.as_id = as_id; @@ -1756,6 +1768,13 @@ int __kvm_set_memory_region(struct kvm *kvm, if (!old || !old->npages) { change = KVM_MR_CREATE; + + /* + * To simplify KVM internals, the total number of pages across + * all memslots must fit in an unsigned long. + */ + if ((kvm->nr_memslot_pages + new.npages) < kvm->nr_memslot_pages) + return -EINVAL; } else { /* Modify an existing slot. */ if ((new.userspace_addr != old->userspace_addr) || (new.npages != old->npages) ||
On 01.11.2021 23:29, Sean Christopherson wrote: > On Wed, Oct 20, 2021, Sean Christopherson wrote: >> On Wed, Oct 20, 2021, Maciej S. Szmigiero wrote: >>> On 20.10.2021 00:24, Sean Christopherson wrote: >>>> E.g. the whole thing can be >>>> >>>> if (!kvm->arch.n_requested_mmu_pages && >>>> (change == KVM_MR_CREATE || change == KVM_MR_DELETE)) { >>>> unsigned long nr_mmu_pages; >>>> >>>> if (change == KVM_MR_CREATE) { >>>> kvm->arch.n_memslots_pages += new->npages; >>>> } else { >>>> WARN_ON(kvm->arch.n_memslots_pages < old->npages); >>>> kvm->arch.n_memslots_pages -= old->npages; >>>> } >>>> >>>> nr_mmu_pages = (unsigned long)kvm->arch.n_memslots_pages; >>>> nr_mmu_pages *= (KVM_PERMILLE_MMU_PAGES / 1000); >>> >>> The above line will set nr_mmu_pages to zero since KVM_PERMILLE_MMU_PAGES >>> is 20, so when integer-divided by 1000 will result in a multiplication >>> coefficient of zero. >> >> Ugh, math. And thus do_div() to avoid the whole 64-bit divide issue on 32-bit KVM. >> Bummer. > > I was revisiting this today because (a) simply making n_memslots_pages a u64 doesn't > cleanly handle the case where the resulting nr_mmu_pages would wrap, Handling this case without capping total n_memslots_pages would require either capping memslots_pages on 32-bit KVM to make it fit in 32-bits or changing kvm_mmu_change_mmu_pages() and all the logic further down to accept u64's. > (b) any fix > in that are should really go in a separate patch to fix > kvm_mmu_calculate_default_mmu_pages() and then carry that behavior forward > > But as I dove deeper (and deeper), I came to the conclusion that supporting a > total number of memslot pages that doesn't fit in an unsigned long is a waste of > our time. With a 32-bit kernel, userspace can at most address 3gb of virtual > memory, whereas wrapping the total number of pages would require 4tb+ of guest > physical memory. Even with x86's second address space for SMM, that means userspace > would need to alias all of guest memory more than one _thousand_ times. And on > older hardware with MAXPHYADDR < 43, the guest couldn't actually access any of those > aliases even if userspace lied about guest.MAXPHYADDR. > > So unless I'm missing something, or PPC or MIPS has some crazy way for a 32-bit > host to support 4TB of guest memory, my vote would be to explicitly disallow > creating more memslot pages than can fit in an unsigned long. Then x86 KVM could > reuse the cache nr_memslot_pages and x86's MMU wouldn't have to update a big pile > of code to support a scenario that practically speaking is useless. > > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h > index 72b329e82089..acabdbdef5cf 100644 > --- a/include/linux/kvm_host.h > +++ b/include/linux/kvm_host.h > @@ -552,6 +552,7 @@ struct kvm { > */ > struct mutex slots_arch_lock; > struct mm_struct *mm; /* userspace tied to this vm */ > + unsigned long nr_memslot_pages; > struct kvm_memslots __rcu *memslots[KVM_ADDRESS_SPACE_NUM]; > struct kvm_vcpu *vcpus[KVM_MAX_VCPUS]; > > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c > index 8bf4b89cfa03..c63fc5c05322 100644 > --- a/virt/kvm/kvm_main.c > +++ b/virt/kvm/kvm_main.c > @@ -1567,6 +1567,15 @@ static void kvm_commit_memory_region(struct kvm *kvm, > const struct kvm_memory_slot *new, > enum kvm_mr_change change) > { > + /* > + * Update the total number of memslot pages before calling the arch > + * hook so that architectures can consume the result directly. > + */ > + if (change == KVM_MR_DELETE) > + kvm->nr_memslot_pages -= old->npages; > + else if (change == KVM_MR_CREATE) > + kvm->nr_memslot_pages += new->npages; > + > kvm_arch_commit_memory_region(kvm, old, new, change); > > /* > @@ -1738,6 +1747,9 @@ int __kvm_set_memory_region(struct kvm *kvm, > if (!old || !old->npages) > return -EINVAL; > > + if (WARN_ON_ONCE(kvm->nr_memslot_pages < old->npages)) > + return -EIO; > + > memset(&new, 0, sizeof(new)); > new.id = id; > new.as_id = as_id; > @@ -1756,6 +1768,13 @@ int __kvm_set_memory_region(struct kvm *kvm, > > if (!old || !old->npages) { > change = KVM_MR_CREATE; > + > + /* > + * To simplify KVM internals, the total number of pages across > + * all memslots must fit in an unsigned long. > + */ > + if ((kvm->nr_memslot_pages + new.npages) < kvm->nr_memslot_pages) > + return -EINVAL; > } else { /* Modify an existing slot. */ > if ((new.userspace_addr != old->userspace_addr) || > (new.npages != old->npages) || > Capping total n_memslots_pages makes sense to me to avoid the (existing) nr_mmu_pages wraparound issue, will update the next patchset version accordingly. Thanks, Maciej
On Wed, Nov 03, 2021, Maciej S. Szmigiero wrote: > Capping total n_memslots_pages makes sense to me to avoid the (existing) > nr_mmu_pages wraparound issue, will update the next patchset version > accordingly. No need to do it yourself. I have a reworked version of the series with a bunch of cleanups before and after the meat of your series, as well non-functional changes (hopefully) to the "Resolve memslot ID via a hash table" and "Keep memslots in tree-based structures" to avoid all the swap() behavior and to provide better continuity between the aforementioned patches. Unless something goes sideways in the last few touchups, I'll get it posted today.
On 03.11.2021 15:47, Sean Christopherson wrote: > On Wed, Nov 03, 2021, Maciej S. Szmigiero wrote: >> Capping total n_memslots_pages makes sense to me to avoid the (existing) >> nr_mmu_pages wraparound issue, will update the next patchset version >> accordingly. > > No need to do it yourself. I have a reworked version of the series with a bunch > of cleanups before and after the meat of your series, as well non-functional changes > (hopefully) to the "Resolve memslot ID via a hash table" and "Keep memslots in > tree-based structures" to avoid all the swap() behavior and to provide better > continuity between the aforementioned patches. Unless something goes sideways in > the last few touchups, I'll get it posted today. > Thanks.
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f8f48a7ec577..315d5368ba84 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1038,6 +1038,7 @@ struct kvm_x86_msr_filter { #define APICV_INHIBIT_REASON_X2APIC 5 struct kvm_arch { + u64 n_memslots_pages; unsigned long n_used_mmu_pages; unsigned long n_requested_mmu_pages; unsigned long n_max_mmu_pages; @@ -1572,7 +1573,6 @@ void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm, const struct kvm_memory_slot *memslot); void kvm_mmu_zap_all(struct kvm *kvm); void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen); -unsigned long kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm); void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long kvm_nr_mmu_pages); int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3); diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 2d7e61122af8..61b9b7b5c10c 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -6133,30 +6133,6 @@ int kvm_mmu_module_init(void) return ret; } -/* - * Calculate mmu pages needed for kvm. - */ -unsigned long kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm) -{ - unsigned long nr_mmu_pages; - unsigned long nr_pages = 0; - struct kvm_memslots *slots; - struct kvm_memory_slot *memslot; - int i; - - for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { - slots = __kvm_memslots(kvm, i); - - kvm_for_each_memslot(memslot, slots) - nr_pages += memslot->npages; - } - - nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000; - nr_mmu_pages = max(nr_mmu_pages, KVM_MIN_ALLOC_MMU_PAGES); - - return nr_mmu_pages; -} - void kvm_mmu_destroy(struct kvm_vcpu *vcpu) { kvm_mmu_unload(vcpu); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 28ef14155726..65fdf27b9423 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -11609,9 +11609,23 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, const struct kvm_memory_slot *new, enum kvm_mr_change change) { - if (!kvm->arch.n_requested_mmu_pages) - kvm_mmu_change_mmu_pages(kvm, - kvm_mmu_calculate_default_mmu_pages(kvm)); + if (change == KVM_MR_CREATE) + kvm->arch.n_memslots_pages += new->npages; + else if (change == KVM_MR_DELETE) { + WARN_ON(kvm->arch.n_memslots_pages < old->npages); + kvm->arch.n_memslots_pages -= old->npages; + } + + if (!kvm->arch.n_requested_mmu_pages) { + u64 memslots_pages; + unsigned long nr_mmu_pages; + + memslots_pages = kvm->arch.n_memslots_pages * KVM_PERMILLE_MMU_PAGES; + do_div(memslots_pages, 1000); + nr_mmu_pages = max_t(typeof(nr_mmu_pages), + memslots_pages, KVM_MIN_ALLOC_MMU_PAGES); + kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages); + } kvm_mmu_slot_apply_flags(kvm, old, new, change);