diff mbox series

[v3,5/8] x86/pv: allow reading FEATURE_CONTROL MSR

Message ID 20200901105445.22277-6-roger.pau@citrix.com (mailing list archive)
State Superseded
Headers show
Series x86: switch default MSR behavior | expand

Commit Message

Roger Pau Monné Sept. 1, 2020, 10:54 a.m. UTC
Linux PV guests will attempt to read the FEATURE_CONTROL MSR, so move
the handling done in VMX code into guest_rdmsr as it can be shared
between PV and HVM guests that way.

Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
---
Changes from v1:
 - Move the VMX implementation into guest_rdmsr.
---
 xen/arch/x86/hvm/vmx/vmx.c |  8 +-------
 xen/arch/x86/msr.c         | 13 +++++++++++++
 2 files changed, 14 insertions(+), 7 deletions(-)

Comments

Andrew Cooper Sept. 2, 2020, 8:56 p.m. UTC | #1
On 01/09/2020 11:54, Roger Pau Monne wrote:
> Linux PV guests will attempt to read the FEATURE_CONTROL MSR, so move
> the handling done in VMX code into guest_rdmsr as it can be shared
> between PV and HVM guests that way.
>
> Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
> ---
> Changes from v1:
>  - Move the VMX implementation into guest_rdmsr.
> ---
>  xen/arch/x86/hvm/vmx/vmx.c |  8 +-------
>  xen/arch/x86/msr.c         | 13 +++++++++++++
>  2 files changed, 14 insertions(+), 7 deletions(-)
>
> diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
> index 4717e50d4a..f6657af923 100644
> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -2980,13 +2980,7 @@ static int vmx_msr_read_intercept(unsigned int msr, uint64_t *msr_content)
>      case MSR_IA32_DEBUGCTLMSR:
>          __vmread(GUEST_IA32_DEBUGCTL, msr_content);
>          break;
> -    case MSR_IA32_FEATURE_CONTROL:
> -        *msr_content = IA32_FEATURE_CONTROL_LOCK;
> -        if ( vmce_has_lmce(curr) )
> -            *msr_content |= IA32_FEATURE_CONTROL_LMCE_ON;
> -        if ( nestedhvm_enabled(curr->domain) )
> -            *msr_content |= IA32_FEATURE_CONTROL_ENABLE_VMXON_OUTSIDE_SMX;
> -        break;
> +
>      case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_VMFUNC:
>          if ( !nvmx_msr_read_intercept(msr, msr_content) )
>              goto gp_fault;
> diff --git a/xen/arch/x86/msr.c b/xen/arch/x86/msr.c
> index e84107ac7b..cc2f111a90 100644
> --- a/xen/arch/x86/msr.c
> +++ b/xen/arch/x86/msr.c
> @@ -25,6 +25,7 @@
>  #include <xen/sched.h>
>  
>  #include <asm/debugreg.h>
> +#include <asm/hvm/nestedhvm.h>
>  #include <asm/hvm/viridian.h>
>  #include <asm/msr.h>
>  #include <asm/setup.h>
> @@ -197,6 +198,18 @@ int guest_rdmsr(struct vcpu *v, uint32_t msr, uint64_t *val)
>          /* Not offered to guests. */
>          goto gp_fault;
>  
> +    case MSR_IA32_FEATURE_CONTROL:
> +        if ( !(cp->x86_vendor & X86_VENDOR_INTEL) )
> +            goto gp_fault;

The MSR is available if:

"If any one enumeration
condition for defined bit
field position greater than
bit 0 holds."

which for us means cp->basic.vmx || cp->feat.lmce at the moment, with
perhaps some smx/sgx in the future.

In particular, this MSR is available on Centaur and Shanghai, which
implement VT-x.

~Andrew

> +
> +        *val = IA32_FEATURE_CONTROL_LOCK;
> +        if ( vmce_has_lmce(v) )
> +            *val |= IA32_FEATURE_CONTROL_LMCE_ON;
> +        if ( nestedhvm_enabled(d) )
> +            *val |= IA32_FEATURE_CONTROL_ENABLE_VMXON_OUTSIDE_SMX;
> +        break;
> +
> +
>      case MSR_IA32_PLATFORM_ID:
>          if ( !(cp->x86_vendor & X86_VENDOR_INTEL) ||
>               !(boot_cpu_data.x86_vendor & X86_VENDOR_INTEL) )
Roger Pau Monné Sept. 3, 2020, 1:33 p.m. UTC | #2
On Wed, Sep 02, 2020 at 09:56:48PM +0100, Andrew Cooper wrote:
> On 01/09/2020 11:54, Roger Pau Monne wrote:
> > Linux PV guests will attempt to read the FEATURE_CONTROL MSR, so move
> > the handling done in VMX code into guest_rdmsr as it can be shared
> > between PV and HVM guests that way.
> >
> > Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
> > ---
> > Changes from v1:
> >  - Move the VMX implementation into guest_rdmsr.
> > ---
> >  xen/arch/x86/hvm/vmx/vmx.c |  8 +-------
> >  xen/arch/x86/msr.c         | 13 +++++++++++++
> >  2 files changed, 14 insertions(+), 7 deletions(-)
> >
> > diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
> > index 4717e50d4a..f6657af923 100644
> > --- a/xen/arch/x86/hvm/vmx/vmx.c
> > +++ b/xen/arch/x86/hvm/vmx/vmx.c
> > @@ -2980,13 +2980,7 @@ static int vmx_msr_read_intercept(unsigned int msr, uint64_t *msr_content)
> >      case MSR_IA32_DEBUGCTLMSR:
> >          __vmread(GUEST_IA32_DEBUGCTL, msr_content);
> >          break;
> > -    case MSR_IA32_FEATURE_CONTROL:
> > -        *msr_content = IA32_FEATURE_CONTROL_LOCK;
> > -        if ( vmce_has_lmce(curr) )
> > -            *msr_content |= IA32_FEATURE_CONTROL_LMCE_ON;
> > -        if ( nestedhvm_enabled(curr->domain) )
> > -            *msr_content |= IA32_FEATURE_CONTROL_ENABLE_VMXON_OUTSIDE_SMX;
> > -        break;
> > +
> >      case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_VMFUNC:
> >          if ( !nvmx_msr_read_intercept(msr, msr_content) )
> >              goto gp_fault;
> > diff --git a/xen/arch/x86/msr.c b/xen/arch/x86/msr.c
> > index e84107ac7b..cc2f111a90 100644
> > --- a/xen/arch/x86/msr.c
> > +++ b/xen/arch/x86/msr.c
> > @@ -25,6 +25,7 @@
> >  #include <xen/sched.h>
> >  
> >  #include <asm/debugreg.h>
> > +#include <asm/hvm/nestedhvm.h>
> >  #include <asm/hvm/viridian.h>
> >  #include <asm/msr.h>
> >  #include <asm/setup.h>
> > @@ -197,6 +198,18 @@ int guest_rdmsr(struct vcpu *v, uint32_t msr, uint64_t *val)
> >          /* Not offered to guests. */
> >          goto gp_fault;
> >  
> > +    case MSR_IA32_FEATURE_CONTROL:
> > +        if ( !(cp->x86_vendor & X86_VENDOR_INTEL) )
> > +            goto gp_fault;
> 
> The MSR is available if:
> 
> "If any one enumeration
> condition for defined bit
> field position greater than
> bit 0 holds."
> 
> which for us means cp->basic.vmx || cp->feat.lmce at the moment, with
> perhaps some smx/sgx in the future.

I don't think there's a lmce cpu bit (seems to be signaled in
IA32_MCG_CAP[27] = 1), maybe I should just use vmce_has_lmce?

if ( !cp->basic.vmx || !vmce_has_lmce(v) )
    goto gp_fault;

Is it fine however to return a #GP if we don't provide any of those
features to guests, but the underlying CPU does support them?

That seems to be slightly different from how we currently handle reads
to FEATURE_CONTROL, since it will never fault on Intel (or compliant),
even if we just return with bit 0 set alone. The new approach seems
closer to what real hardware would do.

Thanks, Roger.
Andrew Cooper Sept. 3, 2020, 2:06 p.m. UTC | #3
On 03/09/2020 14:33, Roger Pau Monné wrote:
> On Wed, Sep 02, 2020 at 09:56:48PM +0100, Andrew Cooper wrote:
>> On 01/09/2020 11:54, Roger Pau Monne wrote:
>>> Linux PV guests will attempt to read the FEATURE_CONTROL MSR, so move
>>> the handling done in VMX code into guest_rdmsr as it can be shared
>>> between PV and HVM guests that way.
>>>
>>> Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
>>> ---
>>> Changes from v1:
>>>  - Move the VMX implementation into guest_rdmsr.
>>> ---
>>>  xen/arch/x86/hvm/vmx/vmx.c |  8 +-------
>>>  xen/arch/x86/msr.c         | 13 +++++++++++++
>>>  2 files changed, 14 insertions(+), 7 deletions(-)
>>>
>>> diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
>>> index 4717e50d4a..f6657af923 100644
>>> --- a/xen/arch/x86/hvm/vmx/vmx.c
>>> +++ b/xen/arch/x86/hvm/vmx/vmx.c
>>> @@ -2980,13 +2980,7 @@ static int vmx_msr_read_intercept(unsigned int msr, uint64_t *msr_content)
>>>      case MSR_IA32_DEBUGCTLMSR:
>>>          __vmread(GUEST_IA32_DEBUGCTL, msr_content);
>>>          break;
>>> -    case MSR_IA32_FEATURE_CONTROL:
>>> -        *msr_content = IA32_FEATURE_CONTROL_LOCK;
>>> -        if ( vmce_has_lmce(curr) )
>>> -            *msr_content |= IA32_FEATURE_CONTROL_LMCE_ON;
>>> -        if ( nestedhvm_enabled(curr->domain) )
>>> -            *msr_content |= IA32_FEATURE_CONTROL_ENABLE_VMXON_OUTSIDE_SMX;
>>> -        break;
>>> +
>>>      case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_VMFUNC:
>>>          if ( !nvmx_msr_read_intercept(msr, msr_content) )
>>>              goto gp_fault;
>>> diff --git a/xen/arch/x86/msr.c b/xen/arch/x86/msr.c
>>> index e84107ac7b..cc2f111a90 100644
>>> --- a/xen/arch/x86/msr.c
>>> +++ b/xen/arch/x86/msr.c
>>> @@ -25,6 +25,7 @@
>>>  #include <xen/sched.h>
>>>  
>>>  #include <asm/debugreg.h>
>>> +#include <asm/hvm/nestedhvm.h>
>>>  #include <asm/hvm/viridian.h>
>>>  #include <asm/msr.h>
>>>  #include <asm/setup.h>
>>> @@ -197,6 +198,18 @@ int guest_rdmsr(struct vcpu *v, uint32_t msr, uint64_t *val)
>>>          /* Not offered to guests. */
>>>          goto gp_fault;
>>>  
>>> +    case MSR_IA32_FEATURE_CONTROL:
>>> +        if ( !(cp->x86_vendor & X86_VENDOR_INTEL) )
>>> +            goto gp_fault;
>> The MSR is available if:
>>
>> "If any one enumeration
>> condition for defined bit
>> field position greater than
>> bit 0 holds."
>>
>> which for us means cp->basic.vmx || cp->feat.lmce at the moment, with
>> perhaps some smx/sgx in the future.
> I don't think there's a lmce cpu bit (seems to be signaled in
> IA32_MCG_CAP[27] = 1), maybe I should just use vmce_has_lmce?
>
> if ( !cp->basic.vmx || !vmce_has_lmce(v) )
>     goto gp_fault;

Ah yes, sorry.

Eventually that will be mp->mcg_cap.lmce, but use the predicate for now.

> Is it fine however to return a #GP if we don't provide any of those
> features to guests, but the underlying CPU does support them?

Yes.  That is literally how the availability of the MSR specified by Intel.

Any code which doesn't follow those rules will find #GP even on some
recent CPUs.

> That seems to be slightly different from how we currently handle reads
> to FEATURE_CONTROL, since it will never fault on Intel (or compliant),
> even if we just return with bit 0 set alone. The new approach seems
> closer to what real hardware would do.

Don't fall into the trap of assuming that the current MSR behaviour is
perfect, and something we wish to preserve. :)

~Andrew
Roger Pau Monné Sept. 3, 2020, 2:10 p.m. UTC | #4
On Thu, Sep 03, 2020 at 03:06:38PM +0100, Andrew Cooper wrote:
> On 03/09/2020 14:33, Roger Pau Monné wrote:
> > On Wed, Sep 02, 2020 at 09:56:48PM +0100, Andrew Cooper wrote:
> >> On 01/09/2020 11:54, Roger Pau Monne wrote:
> >>> Linux PV guests will attempt to read the FEATURE_CONTROL MSR, so move
> >>> the handling done in VMX code into guest_rdmsr as it can be shared
> >>> between PV and HVM guests that way.
> >>>
> >>> Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
> >>> ---
> >>> Changes from v1:
> >>>  - Move the VMX implementation into guest_rdmsr.
> >>> ---
> >>>  xen/arch/x86/hvm/vmx/vmx.c |  8 +-------
> >>>  xen/arch/x86/msr.c         | 13 +++++++++++++
> >>>  2 files changed, 14 insertions(+), 7 deletions(-)
> >>>
> >>> diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
> >>> index 4717e50d4a..f6657af923 100644
> >>> --- a/xen/arch/x86/hvm/vmx/vmx.c
> >>> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> >>> @@ -2980,13 +2980,7 @@ static int vmx_msr_read_intercept(unsigned int msr, uint64_t *msr_content)
> >>>      case MSR_IA32_DEBUGCTLMSR:
> >>>          __vmread(GUEST_IA32_DEBUGCTL, msr_content);
> >>>          break;
> >>> -    case MSR_IA32_FEATURE_CONTROL:
> >>> -        *msr_content = IA32_FEATURE_CONTROL_LOCK;
> >>> -        if ( vmce_has_lmce(curr) )
> >>> -            *msr_content |= IA32_FEATURE_CONTROL_LMCE_ON;
> >>> -        if ( nestedhvm_enabled(curr->domain) )
> >>> -            *msr_content |= IA32_FEATURE_CONTROL_ENABLE_VMXON_OUTSIDE_SMX;
> >>> -        break;
> >>> +
> >>>      case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_VMFUNC:
> >>>          if ( !nvmx_msr_read_intercept(msr, msr_content) )
> >>>              goto gp_fault;
> >>> diff --git a/xen/arch/x86/msr.c b/xen/arch/x86/msr.c
> >>> index e84107ac7b..cc2f111a90 100644
> >>> --- a/xen/arch/x86/msr.c
> >>> +++ b/xen/arch/x86/msr.c
> >>> @@ -25,6 +25,7 @@
> >>>  #include <xen/sched.h>
> >>>  
> >>>  #include <asm/debugreg.h>
> >>> +#include <asm/hvm/nestedhvm.h>
> >>>  #include <asm/hvm/viridian.h>
> >>>  #include <asm/msr.h>
> >>>  #include <asm/setup.h>
> >>> @@ -197,6 +198,18 @@ int guest_rdmsr(struct vcpu *v, uint32_t msr, uint64_t *val)
> >>>          /* Not offered to guests. */
> >>>          goto gp_fault;
> >>>  
> >>> +    case MSR_IA32_FEATURE_CONTROL:
> >>> +        if ( !(cp->x86_vendor & X86_VENDOR_INTEL) )
> >>> +            goto gp_fault;
> >> The MSR is available if:
> >>
> >> "If any one enumeration
> >> condition for defined bit
> >> field position greater than
> >> bit 0 holds."
> >>
> >> which for us means cp->basic.vmx || cp->feat.lmce at the moment, with
> >> perhaps some smx/sgx in the future.
> > I don't think there's a lmce cpu bit (seems to be signaled in
> > IA32_MCG_CAP[27] = 1), maybe I should just use vmce_has_lmce?
> >
> > if ( !cp->basic.vmx || !vmce_has_lmce(v) )
> >     goto gp_fault;
> 
> Ah yes, sorry.
> 
> Eventually that will be mp->mcg_cap.lmce, but use the predicate for now.
> 
> > Is it fine however to return a #GP if we don't provide any of those
> > features to guests, but the underlying CPU does support them?
> 
> Yes.  That is literally how the availability of the MSR specified by Intel.
> 
> Any code which doesn't follow those rules will find #GP even on some
> recent CPUs.
> 
> > That seems to be slightly different from how we currently handle reads
> > to FEATURE_CONTROL, since it will never fault on Intel (or compliant),
> > even if we just return with bit 0 set alone. The new approach seems
> > closer to what real hardware would do.
> 
> Don't fall into the trap of assuming that the current MSR behaviour is
> perfect, and something we wish to preserve. :)

Sure, I've pointed out the changes on the commit message, since the
behavior will be different now.

Thanks, Roger.
diff mbox series

Patch

diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 4717e50d4a..f6657af923 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -2980,13 +2980,7 @@  static int vmx_msr_read_intercept(unsigned int msr, uint64_t *msr_content)
     case MSR_IA32_DEBUGCTLMSR:
         __vmread(GUEST_IA32_DEBUGCTL, msr_content);
         break;
-    case MSR_IA32_FEATURE_CONTROL:
-        *msr_content = IA32_FEATURE_CONTROL_LOCK;
-        if ( vmce_has_lmce(curr) )
-            *msr_content |= IA32_FEATURE_CONTROL_LMCE_ON;
-        if ( nestedhvm_enabled(curr->domain) )
-            *msr_content |= IA32_FEATURE_CONTROL_ENABLE_VMXON_OUTSIDE_SMX;
-        break;
+
     case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_VMFUNC:
         if ( !nvmx_msr_read_intercept(msr, msr_content) )
             goto gp_fault;
diff --git a/xen/arch/x86/msr.c b/xen/arch/x86/msr.c
index e84107ac7b..cc2f111a90 100644
--- a/xen/arch/x86/msr.c
+++ b/xen/arch/x86/msr.c
@@ -25,6 +25,7 @@ 
 #include <xen/sched.h>
 
 #include <asm/debugreg.h>
+#include <asm/hvm/nestedhvm.h>
 #include <asm/hvm/viridian.h>
 #include <asm/msr.h>
 #include <asm/setup.h>
@@ -197,6 +198,18 @@  int guest_rdmsr(struct vcpu *v, uint32_t msr, uint64_t *val)
         /* Not offered to guests. */
         goto gp_fault;
 
+    case MSR_IA32_FEATURE_CONTROL:
+        if ( !(cp->x86_vendor & X86_VENDOR_INTEL) )
+            goto gp_fault;
+
+        *val = IA32_FEATURE_CONTROL_LOCK;
+        if ( vmce_has_lmce(v) )
+            *val |= IA32_FEATURE_CONTROL_LMCE_ON;
+        if ( nestedhvm_enabled(d) )
+            *val |= IA32_FEATURE_CONTROL_ENABLE_VMXON_OUTSIDE_SMX;
+        break;
+
+
     case MSR_IA32_PLATFORM_ID:
         if ( !(cp->x86_vendor & X86_VENDOR_INTEL) ||
              !(boot_cpu_data.x86_vendor & X86_VENDOR_INTEL) )