diff mbox series

[05/21] KVM: x86/mmu: Try to avoid crashing KVM if a MMU memory cache is empty

Message ID 20200605213853.14959-6-sean.j.christopherson@intel.com (mailing list archive)
State New, archived
Headers show
Series KVM: Cleanup and unify kvm_mmu_memory_cache usage | expand

Commit Message

Sean Christopherson June 5, 2020, 9:38 p.m. UTC
Attempt to allocate a new object instead of crashing KVM (and likely the
kernel) if a memory cache is unexpectedly empty.  Use GFP_ATOMIC for the
allocation as the caches are used while holding mmu_lock.  The immediate
BUG_ON() makes the code unnecessarily explosive and led to confusing
minimums being used in the past, e.g. allocating 4 objects where 1 would
suffice.

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
---
 arch/x86/kvm/mmu/mmu.c | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

Comments

Ben Gardon June 10, 2020, 10:12 p.m. UTC | #1
On Fri, Jun 5, 2020 at 2:39 PM Sean Christopherson
<sean.j.christopherson@intel.com> wrote:
>
> Attempt to allocate a new object instead of crashing KVM (and likely the
> kernel) if a memory cache is unexpectedly empty.  Use GFP_ATOMIC for the
> allocation as the caches are used while holding mmu_lock.  The immediate
> BUG_ON() makes the code unnecessarily explosive and led to confusing
> minimums being used in the past, e.g. allocating 4 objects where 1 would
> suffice.
>
> Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
> ---
>  arch/x86/kvm/mmu/mmu.c | 21 +++++++++++++++------
>  1 file changed, 15 insertions(+), 6 deletions(-)
>
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index ba70de24a5b0..5e773564ab20 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -1060,6 +1060,15 @@ static void walk_shadow_page_lockless_end(struct kvm_vcpu *vcpu)
>         local_irq_enable();
>  }
>
> +static inline void *mmu_memory_cache_alloc_obj(struct kvm_mmu_memory_cache *mc,
> +                                              gfp_t gfp_flags)
> +{
> +       if (mc->kmem_cache)
> +               return kmem_cache_zalloc(mc->kmem_cache, gfp_flags);
> +       else
> +               return (void *)__get_free_page(gfp_flags);
> +}
> +
>  static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int min)
>  {
>         void *obj;
> @@ -1067,10 +1076,7 @@ static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int min)
>         if (mc->nobjs >= min)
>                 return 0;
>         while (mc->nobjs < ARRAY_SIZE(mc->objects)) {
> -               if (mc->kmem_cache)
> -                       obj = kmem_cache_zalloc(mc->kmem_cache, GFP_KERNEL_ACCOUNT);
> -               else
> -                       obj = (void *)__get_free_page(GFP_KERNEL_ACCOUNT);
> +               obj = mmu_memory_cache_alloc_obj(mc, GFP_KERNEL_ACCOUNT);
>                 if (!obj)
>                         return mc->nobjs >= min ? 0 : -ENOMEM;
>                 mc->objects[mc->nobjs++] = obj;
> @@ -1118,8 +1124,11 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc)
>  {
>         void *p;
>
> -       BUG_ON(!mc->nobjs);
> -       p = mc->objects[--mc->nobjs];
> +       if (WARN_ON(!mc->nobjs))
> +               p = mmu_memory_cache_alloc_obj(mc, GFP_ATOMIC | __GFP_ACCOUNT);
Is an atomic allocation really necessary here? In most cases, when
topping up the memory cache we are handing a guest page fault. This
bug could also be removed by returning null if unable to allocate from
the cache, and then re-trying the page fault in that case. I don't
know if this is necessary to handle other, non-x86 architectures more
easily, but I worry this could cause some unpleasantness if combined
with some other bug or the host was in a low memory situation and then
this consumed the atomic pool. Perhaps this is a moot point since we
log a warning and consider the atomic allocation something of an
error.
> +       else
> +               p = mc->objects[--mc->nobjs];
> +       BUG_ON(!p);
>         return p;
>  }
>
> --
> 2.26.0
>
Sean Christopherson June 17, 2020, 12:53 a.m. UTC | #2
On Wed, Jun 10, 2020 at 03:12:04PM -0700, Ben Gardon wrote:
> On Fri, Jun 5, 2020 at 2:39 PM Sean Christopherson
> <sean.j.christopherson@intel.com> wrote:
> >
> > Attempt to allocate a new object instead of crashing KVM (and likely the
> > kernel) if a memory cache is unexpectedly empty.  Use GFP_ATOMIC for the
> > allocation as the caches are used while holding mmu_lock.  The immediate
> > BUG_ON() makes the code unnecessarily explosive and led to confusing
> > minimums being used in the past, e.g. allocating 4 objects where 1 would
> > suffice.
> >
> > Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
> > ---
> >  arch/x86/kvm/mmu/mmu.c | 21 +++++++++++++++------
> >  1 file changed, 15 insertions(+), 6 deletions(-)
> >
> > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> > index ba70de24a5b0..5e773564ab20 100644
> > --- a/arch/x86/kvm/mmu/mmu.c
> > +++ b/arch/x86/kvm/mmu/mmu.c
> > @@ -1060,6 +1060,15 @@ static void walk_shadow_page_lockless_end(struct kvm_vcpu *vcpu)
> >         local_irq_enable();
> >  }
> >
> > +static inline void *mmu_memory_cache_alloc_obj(struct kvm_mmu_memory_cache *mc,
> > +                                              gfp_t gfp_flags)
> > +{
> > +       if (mc->kmem_cache)
> > +               return kmem_cache_zalloc(mc->kmem_cache, gfp_flags);
> > +       else
> > +               return (void *)__get_free_page(gfp_flags);
> > +}
> > +
> >  static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int min)
> >  {
> >         void *obj;
> > @@ -1067,10 +1076,7 @@ static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int min)
> >         if (mc->nobjs >= min)
> >                 return 0;
> >         while (mc->nobjs < ARRAY_SIZE(mc->objects)) {
> > -               if (mc->kmem_cache)
> > -                       obj = kmem_cache_zalloc(mc->kmem_cache, GFP_KERNEL_ACCOUNT);
> > -               else
> > -                       obj = (void *)__get_free_page(GFP_KERNEL_ACCOUNT);
> > +               obj = mmu_memory_cache_alloc_obj(mc, GFP_KERNEL_ACCOUNT);
> >                 if (!obj)
> >                         return mc->nobjs >= min ? 0 : -ENOMEM;
> >                 mc->objects[mc->nobjs++] = obj;
> > @@ -1118,8 +1124,11 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc)
> >  {
> >         void *p;
> >
> > -       BUG_ON(!mc->nobjs);
> > -       p = mc->objects[--mc->nobjs];
> > +       if (WARN_ON(!mc->nobjs))
> > +               p = mmu_memory_cache_alloc_obj(mc, GFP_ATOMIC | __GFP_ACCOUNT);
> Is an atomic allocation really necessary here? In most cases, when
> topping up the memory cache we are handing a guest page fault. This
> bug could also be removed by returning null if unable to allocate from
> the cache, and then re-trying the page fault in that case.

The whole point of these caches is to avoid having to deal with allocation
errors in the low level MMU paths, e.g. propagating an error up from
pte_list_add() would be a mess.

> I don't know if this is necessary to handle other, non-x86 architectures more
> easily, but I worry this could cause some unpleasantness if combined with
> some other bug or the host was in a low memory situation and then this
> consumed the atomic pool. Perhaps this is a moot point since we log a warning
> and consider the atomic allocation something of an error.

Yeah, it's the latter.  If we reach this point it's a guaranteed KVM bug.
Because it's likely that the caller holds a lock, triggering the BUG_ON()
has a high chance of hanging the system.  The idea is to take the path that
_may_ crash the kernel instead of killing the VM and more than likely
crashing the kernel.  And hopefully this code will never be exercised on a
production kernel.

> > +       else
> > +               p = mc->objects[--mc->nobjs];
> > +       BUG_ON(!p);
> >         return p;
> >  }
> >
> > --
> > 2.26.0
> >
Ben Gardon June 17, 2020, 4:36 p.m. UTC | #3
On Tue, Jun 16, 2020 at 5:53 PM Sean Christopherson
<sean.j.christopherson@intel.com> wrote:
>
> On Wed, Jun 10, 2020 at 03:12:04PM -0700, Ben Gardon wrote:
> > On Fri, Jun 5, 2020 at 2:39 PM Sean Christopherson
> > <sean.j.christopherson@intel.com> wrote:
> > >
> > > Attempt to allocate a new object instead of crashing KVM (and likely the
> > > kernel) if a memory cache is unexpectedly empty.  Use GFP_ATOMIC for the
> > > allocation as the caches are used while holding mmu_lock.  The immediate
> > > BUG_ON() makes the code unnecessarily explosive and led to confusing
> > > minimums being used in the past, e.g. allocating 4 objects where 1 would
> > > suffice.
> > >
> > > Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Reviewed-by: Ben Gardon <bgardon@google.com>
> > > ---
> > >  arch/x86/kvm/mmu/mmu.c | 21 +++++++++++++++------
> > >  1 file changed, 15 insertions(+), 6 deletions(-)
> > >
> > > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> > > index ba70de24a5b0..5e773564ab20 100644
> > > --- a/arch/x86/kvm/mmu/mmu.c
> > > +++ b/arch/x86/kvm/mmu/mmu.c
> > > @@ -1060,6 +1060,15 @@ static void walk_shadow_page_lockless_end(struct kvm_vcpu *vcpu)
> > >         local_irq_enable();
> > >  }
> > >
> > > +static inline void *mmu_memory_cache_alloc_obj(struct kvm_mmu_memory_cache *mc,
> > > +                                              gfp_t gfp_flags)
> > > +{
> > > +       if (mc->kmem_cache)
> > > +               return kmem_cache_zalloc(mc->kmem_cache, gfp_flags);
> > > +       else
> > > +               return (void *)__get_free_page(gfp_flags);
> > > +}
> > > +
> > >  static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int min)
> > >  {
> > >         void *obj;
> > > @@ -1067,10 +1076,7 @@ static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int min)
> > >         if (mc->nobjs >= min)
> > >                 return 0;
> > >         while (mc->nobjs < ARRAY_SIZE(mc->objects)) {
> > > -               if (mc->kmem_cache)
> > > -                       obj = kmem_cache_zalloc(mc->kmem_cache, GFP_KERNEL_ACCOUNT);
> > > -               else
> > > -                       obj = (void *)__get_free_page(GFP_KERNEL_ACCOUNT);
> > > +               obj = mmu_memory_cache_alloc_obj(mc, GFP_KERNEL_ACCOUNT);
> > >                 if (!obj)
> > >                         return mc->nobjs >= min ? 0 : -ENOMEM;
> > >                 mc->objects[mc->nobjs++] = obj;
> > > @@ -1118,8 +1124,11 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc)
> > >  {
> > >         void *p;
> > >
> > > -       BUG_ON(!mc->nobjs);
> > > -       p = mc->objects[--mc->nobjs];
> > > +       if (WARN_ON(!mc->nobjs))
> > > +               p = mmu_memory_cache_alloc_obj(mc, GFP_ATOMIC | __GFP_ACCOUNT);
> > Is an atomic allocation really necessary here? In most cases, when
> > topping up the memory cache we are handing a guest page fault. This
> > bug could also be removed by returning null if unable to allocate from
> > the cache, and then re-trying the page fault in that case.
>
> The whole point of these caches is to avoid having to deal with allocation
> errors in the low level MMU paths, e.g. propagating an error up from
> pte_list_add() would be a mess.
>
> > I don't know if this is necessary to handle other, non-x86 architectures more
> > easily, but I worry this could cause some unpleasantness if combined with
> > some other bug or the host was in a low memory situation and then this
> > consumed the atomic pool. Perhaps this is a moot point since we log a warning
> > and consider the atomic allocation something of an error.
>
> Yeah, it's the latter.  If we reach this point it's a guaranteed KVM bug.
> Because it's likely that the caller holds a lock, triggering the BUG_ON()
> has a high chance of hanging the system.  The idea is to take the path that
> _may_ crash the kernel instead of killing the VM and more than likely
> crashing the kernel.  And hopefully this code will never be exercised on a
> production kernel.

That makes sense to me. I agree it's definitely positive to replace a
BUG_ON with something else.

>
> > > +       else
> > > +               p = mc->objects[--mc->nobjs];
> > > +       BUG_ON(!p);
> > >         return p;
> > >  }
> > >
> > > --
> > > 2.26.0
> > >
diff mbox series

Patch

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index ba70de24a5b0..5e773564ab20 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -1060,6 +1060,15 @@  static void walk_shadow_page_lockless_end(struct kvm_vcpu *vcpu)
 	local_irq_enable();
 }
 
+static inline void *mmu_memory_cache_alloc_obj(struct kvm_mmu_memory_cache *mc,
+					       gfp_t gfp_flags)
+{
+	if (mc->kmem_cache)
+		return kmem_cache_zalloc(mc->kmem_cache, gfp_flags);
+	else
+		return (void *)__get_free_page(gfp_flags);
+}
+
 static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int min)
 {
 	void *obj;
@@ -1067,10 +1076,7 @@  static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int min)
 	if (mc->nobjs >= min)
 		return 0;
 	while (mc->nobjs < ARRAY_SIZE(mc->objects)) {
-		if (mc->kmem_cache)
-			obj = kmem_cache_zalloc(mc->kmem_cache, GFP_KERNEL_ACCOUNT);
-		else
-			obj = (void *)__get_free_page(GFP_KERNEL_ACCOUNT);
+		obj = mmu_memory_cache_alloc_obj(mc, GFP_KERNEL_ACCOUNT);
 		if (!obj)
 			return mc->nobjs >= min ? 0 : -ENOMEM;
 		mc->objects[mc->nobjs++] = obj;
@@ -1118,8 +1124,11 @@  static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc)
 {
 	void *p;
 
-	BUG_ON(!mc->nobjs);
-	p = mc->objects[--mc->nobjs];
+	if (WARN_ON(!mc->nobjs))
+		p = mmu_memory_cache_alloc_obj(mc, GFP_ATOMIC | __GFP_ACCOUNT);
+	else
+		p = mc->objects[--mc->nobjs];
+	BUG_ON(!p);
 	return p;
 }