diff mbox series

[v3,2/8] KVM: x86: Clear all supported MPX xfeatures if they are not all set

Message ID 20230224223607.1580880-3-aaronlewis@google.com (mailing list archive)
State New, archived
Headers show
Series Clean up the supported xfeatures | expand

Commit Message

Aaron Lewis Feb. 24, 2023, 10:36 p.m. UTC
Be a good citizen and don't allow any of the supported MPX xfeatures[1]
to be set if they can't all be set.  That way userspace or a guest
doesn't fail if it attempts to set them in XCR0.

[1] CPUID.(EAX=0DH,ECX=0):EAX.BNDREGS[bit-3]
    CPUID.(EAX=0DH,ECX=0):EAX.BNDCSR[bit-4]

Suggested-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Aaron Lewis <aaronlewis@google.com>
---
 arch/x86/kvm/cpuid.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

Comments

Mingwei Zhang March 3, 2023, 9:23 p.m. UTC | #1
On Fri, Feb 24, 2023, Aaron Lewis wrote:
> Be a good citizen and don't allow any of the supported MPX xfeatures[1]
> to be set if they can't all be set.  That way userspace or a guest
> doesn't fail if it attempts to set them in XCR0.
> 
> [1] CPUID.(EAX=0DH,ECX=0):EAX.BNDREGS[bit-3]
>     CPUID.(EAX=0DH,ECX=0):EAX.BNDCSR[bit-4]
> 
> Suggested-by: Jim Mattson <jmattson@google.com>
> Signed-off-by: Aaron Lewis <aaronlewis@google.com>
> ---
>  arch/x86/kvm/cpuid.c | 15 ++++++++++++++-
>  1 file changed, 14 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> index e1165c196970..b2e7407cd114 100644
> --- a/arch/x86/kvm/cpuid.c
> +++ b/arch/x86/kvm/cpuid.c
> @@ -60,9 +60,22 @@ u32 xstate_required_size(u64 xstate_bv, bool compacted)
>  	return ret;
>  }
>  
> +static u64 sanitize_xcr0(u64 xcr0)
> +{
> +	u64 mask;
> +
> +	mask = XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR;
> +	if ((xcr0 & mask) != mask)
> +		xcr0 &= ~mask;
> +
> +	return xcr0;
> +}

Is it better to put sanitize_xcr0() into the previous patch? If we do
that, this one will be just adding purely the MPX related logic and thus
cleaner I think.
> +
>  u64 kvm_permitted_xcr0(void)
>  {
> -	return kvm_caps.supported_xcr0 & xstate_get_guest_group_perm();
> +	u64 permitted_xcr0 = kvm_caps.supported_xcr0 & xstate_get_guest_group_perm();
> +
> +	return sanitize_xcr0(permitted_xcr0);
>  }
>  
>  /*
> -- 
> 2.39.2.637.g21b0678d19-goog
>
Aaron Lewis March 3, 2023, 10:23 p.m. UTC | #2
On Fri, Mar 3, 2023 at 9:23 PM Mingwei Zhang <mizhang@google.com> wrote:
>
> On Fri, Feb 24, 2023, Aaron Lewis wrote:
> > Be a good citizen and don't allow any of the supported MPX xfeatures[1]
> > to be set if they can't all be set.  That way userspace or a guest
> > doesn't fail if it attempts to set them in XCR0.
> >
> > [1] CPUID.(EAX=0DH,ECX=0):EAX.BNDREGS[bit-3]
> >     CPUID.(EAX=0DH,ECX=0):EAX.BNDCSR[bit-4]
> >
> > Suggested-by: Jim Mattson <jmattson@google.com>
> > Signed-off-by: Aaron Lewis <aaronlewis@google.com>
> > ---
> >  arch/x86/kvm/cpuid.c | 15 ++++++++++++++-
> >  1 file changed, 14 insertions(+), 1 deletion(-)
> >
> > diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> > index e1165c196970..b2e7407cd114 100644
> > --- a/arch/x86/kvm/cpuid.c
> > +++ b/arch/x86/kvm/cpuid.c
> > @@ -60,9 +60,22 @@ u32 xstate_required_size(u64 xstate_bv, bool compacted)
> >       return ret;
> >  }
> >
> > +static u64 sanitize_xcr0(u64 xcr0)
> > +{
> > +     u64 mask;
> > +
> > +     mask = XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR;
> > +     if ((xcr0 & mask) != mask)
> > +             xcr0 &= ~mask;
> > +
> > +     return xcr0;
> > +}
>
> Is it better to put sanitize_xcr0() into the previous patch? If we do
> that, this one will be just adding purely the MPX related logic and thus
> cleaner I think.

I don't mind doing that.  I considered putting in its own commit
actually.  The only reason I didn't is I wasn't sure it was
appropriate to have a commit that only added an empty function.  If
that's okay I think I'd lean more towards doing it that way.

> > +
> >  u64 kvm_permitted_xcr0(void)
> >  {
> > -     return kvm_caps.supported_xcr0 & xstate_get_guest_group_perm();
> > +     u64 permitted_xcr0 = kvm_caps.supported_xcr0 & xstate_get_guest_group_perm();
> > +
> > +     return sanitize_xcr0(permitted_xcr0);
> >  }
> >
> >  /*
> > --
> > 2.39.2.637.g21b0678d19-goog
> >
Mingwei Zhang March 7, 2023, 4:32 p.m. UTC | #3
On Fri, Mar 3, 2023 at 2:23 PM Aaron Lewis <aaronlewis@google.com> wrote:
>
> On Fri, Mar 3, 2023 at 9:23 PM Mingwei Zhang <mizhang@google.com> wrote:
> >
> > On Fri, Feb 24, 2023, Aaron Lewis wrote:
> > > Be a good citizen and don't allow any of the supported MPX xfeatures[1]
> > > to be set if they can't all be set.  That way userspace or a guest
> > > doesn't fail if it attempts to set them in XCR0.
> > >
> > > [1] CPUID.(EAX=0DH,ECX=0):EAX.BNDREGS[bit-3]
> > >     CPUID.(EAX=0DH,ECX=0):EAX.BNDCSR[bit-4]
> > >
> > > Suggested-by: Jim Mattson <jmattson@google.com>
> > > Signed-off-by: Aaron Lewis <aaronlewis@google.com>

Reviewed-by: Mingwei Zhang <mizhang@google.com>

> > > ---
> > >  arch/x86/kvm/cpuid.c | 15 ++++++++++++++-
> > >  1 file changed, 14 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> > > index e1165c196970..b2e7407cd114 100644
> > > --- a/arch/x86/kvm/cpuid.c
> > > +++ b/arch/x86/kvm/cpuid.c
> > > @@ -60,9 +60,22 @@ u32 xstate_required_size(u64 xstate_bv, bool compacted)
> > >       return ret;
> > >  }
> > >
> > > +static u64 sanitize_xcr0(u64 xcr0)
> > > +{
> > > +     u64 mask;
> > > +
> > > +     mask = XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR;
> > > +     if ((xcr0 & mask) != mask)
> > > +             xcr0 &= ~mask;
> > > +
> > > +     return xcr0;
> > > +}
> >
> > Is it better to put sanitize_xcr0() into the previous patch? If we do
> > that, this one will be just adding purely the MPX related logic and thus
> > cleaner I think.
>
> I don't mind doing that.  I considered putting in its own commit
> actually.  The only reason I didn't is I wasn't sure it was
> appropriate to have a commit that only added an empty function.  If
> that's okay I think I'd lean more towards doing it that way.
>

Yeah, either way works for me.
Sean Christopherson March 23, 2023, 8:29 p.m. UTC | #4
On Fri, Feb 24, 2023, Aaron Lewis wrote:
> Be a good citizen and don't allow any of the supported MPX xfeatures[1]
> to be set if they can't all be set.  That way userspace or a guest
> doesn't fail if it attempts to set them in XCR0.
> 
> [1] CPUID.(EAX=0DH,ECX=0):EAX.BNDREGS[bit-3]
>     CPUID.(EAX=0DH,ECX=0):EAX.BNDCSR[bit-4]
> 
> Suggested-by: Jim Mattson <jmattson@google.com>
> Signed-off-by: Aaron Lewis <aaronlewis@google.com>
> ---
>  arch/x86/kvm/cpuid.c | 15 ++++++++++++++-
>  1 file changed, 14 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> index e1165c196970..b2e7407cd114 100644
> --- a/arch/x86/kvm/cpuid.c
> +++ b/arch/x86/kvm/cpuid.c
> @@ -60,9 +60,22 @@ u32 xstate_required_size(u64 xstate_bv, bool compacted)
>  	return ret;
>  }
>  
> +static u64 sanitize_xcr0(u64 xcr0)
> +{
> +	u64 mask;
> +
> +	mask = XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR;
> +	if ((xcr0 & mask) != mask)
> +		xcr0 &= ~mask;
> +
> +	return xcr0;
> +}
> +
>  u64 kvm_permitted_xcr0(void)
>  {
> -	return kvm_caps.supported_xcr0 & xstate_get_guest_group_perm();
> +	u64 permitted_xcr0 = kvm_caps.supported_xcr0 & xstate_get_guest_group_perm();
> +
> +	return sanitize_xcr0(permitted_xcr0);

Sanitizing XCR0 in this "permitted" helper conflates two separate things (sanity
vs. permissions) and overall leads to a really, really confusing mess.  E.g.
kvm_vcpu_ioctl_x86_set_xsave(), cpuid_get_supported_xcr0(), kvm_x86_dev_get_attr(),
and kvm_mpx_supported() all use the non-sanitized value, which appears nonsensical,
but is actually correct because the whole "permitted" thing is funky.

On a related topic, isn't cpuid_get_supported_xcr0() buggy?  Ah, no, the wonderfully
named kvm_check_cpuid() ensures fpu_enable_guest_xfd_features() is invoked before
the result can actually be consumed.

Ugh, and past me created a royal mess with SGX subleaf 0x12.1.  KVM shouldn't
be manipulating guest CPUID just to ensure userspace doesn't bypass the guest's
permitted XCR0 by launching an enclave.  I.e. the SGX code that consumes
cpuid_get_supported_xcr0() in __kvm_update_cpuid_runtime() should not exists.
I'm 99.9% certain we can simply nuke that code without breaking userspace, e.g.
QEMU correctly sets the data.

LOL, or at least it used to.  QEMU commit 301e90675c ("target/i386: Enable support
for XSAVES based features") completely broke SGX by using allowed XSS instead of
XCR0.

Anyways, back to this mess.  I was going to say "As I suggested in v2[0], sanitize
kvm_caps.supported_xcr0 itself", but after a _lot_ of starting, I realized (or
finally remembered) that that doesn't work because supported_xcr0 is already sane,
the issue is specifically with xstate_get_guest_group_perm() clearing XTILE_DATA
and leaving behind the XTILE_CFG landmine.

And for all intents and purposes, supported_xcr0 _must_ be sane since it comes
from host_xcr0, which is pulled straight from hardware.  So barring a _completely_
busted CPU or hypervisor _and_ a busted kernel, supported_xcr0 itself can never
be insane.  So not only is my proposal to sanitize supported_xcr0 not viable, the
entire idea of generically sanitizing XCR0 is bunk.  The _only_ issue is XTILE_CFG,
and it's very specifically because of the dynamic feature crud.

So rather than add a bunch of logic to sanitize XCR0, which is at best superfluous
and at worst misleading (again, only XTILE_CFG can ever be insane), I think we
should just special case XTILE_CFG and add a big fat comment explaining why KVM
further manipulates the "supported" XCR0.

Ah, and that's more or less what Aaron had in v1, but then Jim asked about MPX[1]
and things went off the rails (I'm just relieved that I wasn't the one to send
you awry and then complain about the result).

Aaron and/or Mingwei, can you give the attached patches a spin?  Patch 1 is a
slightly reworked version of Aaron's patch 1, and patch 2 implements what I just
described (guts of patch 2 also pasted below).  If things look good, I'll post a v4
of this series.

[0] https://lore.kernel.org/all/Y7R36wsXn3JqwfEv@google.com
[1] https://lore.kernel.org/all/CALMp9eQD8EpS50A0iAxsoaW-_yFmWERWw6rbAh9VSEJjDrMkNQ@mail.gmail.com


---
 arch/x86/kvm/x86.h | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index b6c6988d99b5..ae235bc2b9bc 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -3,6 +3,7 @@
 #define ARCH_X86_KVM_X86_H
 
 #include <linux/kvm_host.h>
+#include <asm/fpu/xstate.h>
 #include <asm/mce.h>
 #include <asm/pvclock.h>
 #include "kvm_cache_regs.h"
@@ -325,7 +326,22 @@ extern bool enable_pmu;
  */
 static inline u64 kvm_get_filtered_xcr0(void)
 {
-	return kvm_caps.supported_xcr0 & xstate_get_guest_group_perm();
+	u64 supported_xcr0 = kvm_caps.supported_xcr0;
+
+	BUILD_BUG_ON(XFEATURE_MASK_USER_DYNAMIC != XFEATURE_MASK_XTILE_DATA);
+
+	if (supported_xcr0 & XFEATURE_MASK_USER_DYNAMIC) {
+		supported_xcr0 &= xstate_get_guest_group_perm();
+
+		/*
+		 * Treat XTILE_CFG as unsupported if the current process isn't
+		 * allowed to use XTILE_DATA, as attempting to set XTILE_CFG in
+		 * XCR0 without setting XTILE_DATA is architecturally illegal.
+		 */
+		if (!(supported_xcr0 & XFEATURE_MASK_XTILE_DATA))
+			supported_xcr0 &= XFEATURE_MASK_XTILE_CFG;
+	}
+	return supported_xcr0;
 }
 
 static inline bool kvm_mpx_supported(void)
--
Mingwei Zhang March 23, 2023, 9:10 p.m. UTC | #5
>
> Aaron and/or Mingwei, can you give the attached patches a spin?  Patch 1 is a
> slightly reworked version of Aaron's patch 1, and patch 2 implements what I just
> described (guts of patch 2 also pasted below).  If things look good, I'll post a v4
> of this series.

Overall I don't have a strong opinion. Extending the sanitization from
AMX to AVX512 and MPX does seem to slightly change the purpose from
permission adjustment to hw feature sanitization. So, it is ok for me
to see the push for AVX512 and MPX as a different motivation, thus
peeling them off from this series.

>
> [0] https://lore.kernel.org/all/Y7R36wsXn3JqwfEv@google.com
> [1] https://lore.kernel.org/all/CALMp9eQD8EpS50A0iAxsoaW-_yFmWERWw6rbAh9VSEJjDrMkNQ@mail.gmail.com
>
I can take a look but in general [1] the whole series should fix the
problem. Let me check the following code.

>
> ---
>  arch/x86/kvm/x86.h | 18 +++++++++++++++++-
>  1 file changed, 17 insertions(+), 1 deletion(-)
>
> diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
> index b6c6988d99b5..ae235bc2b9bc 100644
> --- a/arch/x86/kvm/x86.h
> +++ b/arch/x86/kvm/x86.h
> @@ -3,6 +3,7 @@
>  #define ARCH_X86_KVM_X86_H
>
>  #include <linux/kvm_host.h>
> +#include <asm/fpu/xstate.h>
>  #include <asm/mce.h>
>  #include <asm/pvclock.h>
>  #include "kvm_cache_regs.h"
> @@ -325,7 +326,22 @@ extern bool enable_pmu;
>   */
>  static inline u64 kvm_get_filtered_xcr0(void)
>  {
> -       return kvm_caps.supported_xcr0 & xstate_get_guest_group_perm();
> +       u64 supported_xcr0 = kvm_caps.supported_xcr0;
> +
> +       BUILD_BUG_ON(XFEATURE_MASK_USER_DYNAMIC != XFEATURE_MASK_XTILE_DATA);
> +
> +       if (supported_xcr0 & XFEATURE_MASK_USER_DYNAMIC) {
> +               supported_xcr0 &= xstate_get_guest_group_perm();
> +
> +               /*
> +                * Treat XTILE_CFG as unsupported if the current process isn't
> +                * allowed to use XTILE_DATA, as attempting to set XTILE_CFG in
> +                * XCR0 without setting XTILE_DATA is architecturally illegal.
> +                */
> +               if (!(supported_xcr0 & XFEATURE_MASK_XTILE_DATA))
> +                       supported_xcr0 &= XFEATURE_MASK_XTILE_CFG;

should be this? supported_xcr0 &= ~XFEATURE_MASK_XTILE_CFG;


> +       }
> +       return supported_xcr0;
>  }
>
>  static inline bool kvm_mpx_supported(void)
> --
>
Mingwei Zhang March 23, 2023, 9:16 p.m. UTC | #6
> >  static inline u64 kvm_get_filtered_xcr0(void)
> >  {
> > -       return kvm_caps.supported_xcr0 & xstate_get_guest_group_perm();
> > +       u64 supported_xcr0 = kvm_caps.supported_xcr0;
> > +
> > +       BUILD_BUG_ON(XFEATURE_MASK_USER_DYNAMIC != XFEATURE_MASK_XTILE_DATA);
> > +
> > +       if (supported_xcr0 & XFEATURE_MASK_USER_DYNAMIC) {
> > +               supported_xcr0 &= xstate_get_guest_group_perm();
> > +
> > +               /*
> > +                * Treat XTILE_CFG as unsupported if the current process isn't
> > +                * allowed to use XTILE_DATA, as attempting to set XTILE_CFG in
> > +                * XCR0 without setting XTILE_DATA is architecturally illegal.
> > +                */
> > +               if (!(supported_xcr0 & XFEATURE_MASK_XTILE_DATA))
> > +                       supported_xcr0 &= XFEATURE_MASK_XTILE_CFG;
>
> should be this? supported_xcr0 &= ~XFEATURE_MASK_XTILE_CFG;
>
>
> > +       }
> > +       return supported_xcr0;
> >  }
Also, a minor opinion: shall we use permitted_xcr0 instead of
supported_xcr0 to be consistent with other places? This way,  it is
clear that supported_xcr0 is (almost) never changing.  permitted_xcr0,
as its name suggested, will be subject to permission change.
Sean Christopherson March 23, 2023, 9:29 p.m. UTC | #7
On Thu, Mar 23, 2023, Mingwei Zhang wrote:
> > --- a/arch/x86/kvm/x86.h
> > +++ b/arch/x86/kvm/x86.h
> > @@ -3,6 +3,7 @@
> >  #define ARCH_X86_KVM_X86_H
> >
> >  #include <linux/kvm_host.h>
> > +#include <asm/fpu/xstate.h>
> >  #include <asm/mce.h>
> >  #include <asm/pvclock.h>
> >  #include "kvm_cache_regs.h"
> > @@ -325,7 +326,22 @@ extern bool enable_pmu;
> >   */
> >  static inline u64 kvm_get_filtered_xcr0(void)
> >  {
> > -       return kvm_caps.supported_xcr0 & xstate_get_guest_group_perm();
> > +       u64 supported_xcr0 = kvm_caps.supported_xcr0;
> > +
> > +       BUILD_BUG_ON(XFEATURE_MASK_USER_DYNAMIC != XFEATURE_MASK_XTILE_DATA);
> > +
> > +       if (supported_xcr0 & XFEATURE_MASK_USER_DYNAMIC) {
> > +               supported_xcr0 &= xstate_get_guest_group_perm();
> > +
> > +               /*
> > +                * Treat XTILE_CFG as unsupported if the current process isn't
> > +                * allowed to use XTILE_DATA, as attempting to set XTILE_CFG in
> > +                * XCR0 without setting XTILE_DATA is architecturally illegal.
> > +                */
> > +               if (!(supported_xcr0 & XFEATURE_MASK_XTILE_DATA))
> > +                       supported_xcr0 &= XFEATURE_MASK_XTILE_CFG;
> 
> should be this? supported_xcr0 &= ~XFEATURE_MASK_XTILE_CFG;

Doh, yes.
Sean Christopherson March 23, 2023, 9:30 p.m. UTC | #8
On Thu, Mar 23, 2023, Mingwei Zhang wrote:
> > >  static inline u64 kvm_get_filtered_xcr0(void)
> > >  {
> > > -       return kvm_caps.supported_xcr0 & xstate_get_guest_group_perm();
> > > +       u64 supported_xcr0 = kvm_caps.supported_xcr0;
> > > +
> > > +       BUILD_BUG_ON(XFEATURE_MASK_USER_DYNAMIC != XFEATURE_MASK_XTILE_DATA);
> > > +
> > > +       if (supported_xcr0 & XFEATURE_MASK_USER_DYNAMIC) {
> > > +               supported_xcr0 &= xstate_get_guest_group_perm();
> > > +
> > > +               /*
> > > +                * Treat XTILE_CFG as unsupported if the current process isn't
> > > +                * allowed to use XTILE_DATA, as attempting to set XTILE_CFG in
> > > +                * XCR0 without setting XTILE_DATA is architecturally illegal.
> > > +                */
> > > +               if (!(supported_xcr0 & XFEATURE_MASK_XTILE_DATA))
> > > +                       supported_xcr0 &= XFEATURE_MASK_XTILE_CFG;
> >
> > should be this? supported_xcr0 &= ~XFEATURE_MASK_XTILE_CFG;
> >
> >
> > > +       }
> > > +       return supported_xcr0;
> > >  }
> Also, a minor opinion: shall we use permitted_xcr0 instead of
> supported_xcr0 to be consistent with other places? This way,  it is
> clear that supported_xcr0 is (almost) never changing.  permitted_xcr0,
> as its name suggested, will be subject to permission change.

Ya, works for me.
diff mbox series

Patch

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index e1165c196970..b2e7407cd114 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -60,9 +60,22 @@  u32 xstate_required_size(u64 xstate_bv, bool compacted)
 	return ret;
 }
 
+static u64 sanitize_xcr0(u64 xcr0)
+{
+	u64 mask;
+
+	mask = XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR;
+	if ((xcr0 & mask) != mask)
+		xcr0 &= ~mask;
+
+	return xcr0;
+}
+
 u64 kvm_permitted_xcr0(void)
 {
-	return kvm_caps.supported_xcr0 & xstate_get_guest_group_perm();
+	u64 permitted_xcr0 = kvm_caps.supported_xcr0 & xstate_get_guest_group_perm();
+
+	return sanitize_xcr0(permitted_xcr0);
 }
 
 /*