diff mbox

[v2,11/30] xen/x86: Calculate maximum host and guest featuresets

Message ID 1454679743-18133-12-git-send-email-andrew.cooper3@citrix.com (mailing list archive)
State New, archived
Headers show

Commit Message

Andrew Cooper Feb. 5, 2016, 1:42 p.m. UTC
All of this information will be used by the toolstack to make informed
levelling decisions for VMs, and by Xen to sanity check toolstack-provided
information.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>
---
 xen/arch/x86/cpuid.c        | 152 ++++++++++++++++++++++++++++++++++++++++++++
 xen/arch/x86/setup.c        |   3 +
 xen/include/asm-x86/cpuid.h |  17 +++++
 3 files changed, 172 insertions(+)

Comments

Jan Beulich Feb. 15, 2016, 1:37 p.m. UTC | #1
>>> On 05.02.16 at 14:42, <andrew.cooper3@citrix.com> wrote:
> --- a/xen/arch/x86/cpuid.c
> +++ b/xen/arch/x86/cpuid.c
> @@ -1,13 +1,165 @@
>  #include <xen/lib.h>
>  #include <asm/cpuid.h>
> +#include <asm/hvm/hvm.h>
> +#include <asm/hvm/vmx/vmcs.h>
> +#include <asm/processor.h>
> +
> +#define COMMON_1D INIT_COMMON_FEATURES
>  
>  const uint32_t known_features[] = INIT_KNOWN_FEATURES;
>  const uint32_t inverted_features[] = INIT_INVERTED_FEATURES;
>  
> +static const uint32_t pv_featuremask[] = INIT_PV_FEATURES;
> +static const uint32_t hvm_shadow_featuremask[] = INIT_HVM_SHADOW_FEATURES;
> +static const uint32_t hvm_hap_featuremask[] = INIT_HVM_HAP_FEATURES;

Considering that calculate_featuresets() gets called from
__start_xen(), that function and some others as well as the
above could apparently all live in .init.* sections.

> +uint32_t __read_mostly raw_featureset[FSCAPINTS];
> +uint32_t __read_mostly host_featureset[FSCAPINTS];
> +uint32_t __read_mostly pv_featureset[FSCAPINTS];
> +uint32_t __read_mostly hvm_featureset[FSCAPINTS];
> +
> +static void sanitise_featureset(uint32_t *fs)
> +{
> +    unsigned int i;
> +
> +    for ( i = 0; i < FSCAPINTS; ++i )
> +    {
> +        /* Clamp to known mask. */
> +        fs[i] &= known_features[i];
> +    }
> +
> +    switch ( boot_cpu_data.x86_vendor )
> +    {
> +    case X86_VENDOR_INTEL:
> +        /* Intel clears the common bits in e1d. */
> +        fs[FEATURESET_e1d] &= ~COMMON_1D;
> +        break;
> +
> +    case X86_VENDOR_AMD:
> +        /* AMD duplicates the common bits between 1d and e1d. */
> +        fs[FEATURESET_e1d] = ((fs[FEATURESET_1d]  &  COMMON_1D) |
> +                              (fs[FEATURESET_e1d] & ~COMMON_1D));
> +        break;
> +    }

How is this meant to work with cross vendor migration?

> +static void calculate_raw_featureset(void)
> +{
> +    unsigned int i, max, tmp;
> +
> +    max = cpuid_eax(0);
> +
> +    if ( max >= 1 )
> +        cpuid(0x1, &tmp, &tmp,
> +              &raw_featureset[FEATURESET_1c],
> +              &raw_featureset[FEATURESET_1d]);
> +    if ( max >= 7 )
> +        cpuid_count(0x7, 0, &tmp,
> +                    &raw_featureset[FEATURESET_7b0],
> +                    &raw_featureset[FEATURESET_7c0],
> +                    &tmp);
> +    if ( max >= 0xd )
> +        cpuid_count(0xd, 1,
> +                    &raw_featureset[FEATURESET_Da1],
> +                    &tmp, &tmp, &tmp);
> +
> +    max = cpuid_eax(0x80000000);
> +    if ( max >= 0x80000001 )

I don't recall where it was that I recently stumbled across a similar
check, but this is dangerous: Instead of >= this should check that
the upper 16 bits equal 0x8000 and the lower ones are >= 1.

> +static void calculate_host_featureset(void)
> +{
> +    memcpy(host_featureset, boot_cpu_data.x86_capability,
> +           sizeof(host_featureset));
> +}

Why not simply

#define host_featureset boot_cpu_data.x86_capability

?

> +static void calculate_pv_featureset(void)
> +{
> +    unsigned int i;
> +
> +    for ( i = 0; i < ARRAY_SIZE(pv_featureset); ++i )
> +        pv_featureset[i] = host_featureset[i] & pv_featuremask[i];

I think when two arrays are involved simply using FSCAPINTS
as the upper bound would be more appropriate (and shorter).

> +static void calculate_hvm_featureset(void)
> +{
> +    unsigned int i;
> +    const uint32_t *hvm_featuremask;
> +
> +    if ( !hvm_enabled )
> +        return;
> +
> +    hvm_featuremask = hvm_funcs.hap_supported ?
> +        hvm_hap_featuremask : hvm_shadow_featuremask;
> +
> +    for ( i = 0; i < ARRAY_SIZE(hvm_featureset); ++i )
> +        hvm_featureset[i] = host_featureset[i] & hvm_featuremask[i];
> +
> +    /* Unconditionally claim to be able to set the hypervisor bit. */
> +    __set_bit(X86_FEATURE_HYPERVISOR, hvm_featureset);
> +
> +    /*
> +     * On AMD, PV guests are entirely unable to use 'sysenter' as Xen runs in
> +     * long mode (and init_amd() has cleared it out of host capabilities), but
> +     * HVM guests are able if running in protected mode.
> +     */
> +    if ( (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) &&
> +         test_bit(X86_FEATURE_SEP, raw_featureset) )
> +        __set_bit(X86_FEATURE_SEP, hvm_featureset);
> +
> +    /*
> +     * With VT-x, some features are only supported by Xen if dedicated
> +     * hardware support is also available.
> +     */
> +    if ( cpu_has_vmx )
> +    {
> +        if ( !(vmx_vmexit_control & VM_EXIT_CLEAR_BNDCFGS) ||
> +             !(vmx_vmentry_control & VM_ENTRY_LOAD_BNDCFGS) )
> +            __clear_bit(X86_FEATURE_MPX, hvm_featureset);
> +
> +        if ( !cpu_has_vmx_xsaves )
> +            __clear_bit(X86_FEATURE_XSAVES, hvm_featureset);
> +
> +        if ( !cpu_has_vmx_pcommit )
> +            __clear_bit(X86_FEATURE_PCOMMIT, hvm_featureset);
> +    }
> +
> +    sanitise_featureset(pv_featureset);

s/pv_/hvm_/ ?

>  static void __maybe_unused build_assertions(void)

While affecting an earlier patch - __init?

> --- a/xen/include/asm-x86/cpuid.h
> +++ b/xen/include/asm-x86/cpuid.h
> @@ -5,12 +5,29 @@
>  
>  #define FSCAPINTS FEATURESET_NR_ENTRIES
>  
> +#define FEATURESET_1d     0 /* 0x00000001.edx      */
> +#define FEATURESET_1c     1 /* 0x00000001.ecx      */
> +#define FEATURESET_e1d    2 /* 0x80000001.edx      */
> +#define FEATURESET_e1c    3 /* 0x80000001.ecx      */
> +#define FEATURESET_Da1    4 /* 0x0000000d:1.eax    */
> +#define FEATURESET_7b0    5 /* 0x00000007:0.ebx    */
> +#define FEATURESET_7c0    6 /* 0x00000007:0.ecx    */
> +#define FEATURESET_e7d    7 /* 0x80000007.edx      */
> +#define FEATURESET_e8b    8 /* 0x80000008.ebx      */
> +
>  #ifndef __ASSEMBLY__
>  #include <xen/types.h>
>  
>  extern const uint32_t known_features[FSCAPINTS];
>  extern const uint32_t inverted_features[FSCAPINTS];
>  
> +extern uint32_t raw_featureset[FSCAPINTS];
> +extern uint32_t host_featureset[FSCAPINTS];
> +extern uint32_t pv_featureset[FSCAPINTS];
> +extern uint32_t hvm_featureset[FSCAPINTS];

I wonder whether it wouldn't be better to make these accessible
only via function calls, with the functions returning pointers to
const, to avoid seducing people into fiddling with these from
outside cpuid.c.

Jan
Andrew Cooper Feb. 15, 2016, 2:57 p.m. UTC | #2
On 15/02/16 13:37, Jan Beulich wrote:
>>>> On 05.02.16 at 14:42, <andrew.cooper3@citrix.com> wrote:
>> --- a/xen/arch/x86/cpuid.c
>> +++ b/xen/arch/x86/cpuid.c
>> @@ -1,13 +1,165 @@
>>  #include <xen/lib.h>
>>  #include <asm/cpuid.h>
>> +#include <asm/hvm/hvm.h>
>> +#include <asm/hvm/vmx/vmcs.h>
>> +#include <asm/processor.h>
>> +
>> +#define COMMON_1D INIT_COMMON_FEATURES
>>  
>>  const uint32_t known_features[] = INIT_KNOWN_FEATURES;
>>  const uint32_t inverted_features[] = INIT_INVERTED_FEATURES;
>>  
>> +static const uint32_t pv_featuremask[] = INIT_PV_FEATURES;
>> +static const uint32_t hvm_shadow_featuremask[] = INIT_HVM_SHADOW_FEATURES;
>> +static const uint32_t hvm_hap_featuremask[] = INIT_HVM_HAP_FEATURES;
> Considering that calculate_featuresets() gets called from
> __start_xen(), that function and some others as well as the
> above could apparently all live in .init.* sections.

known and inverted features are used from the AP bringup path, so can't
be init.

calculate_featureset(), and these _featuremask[] arrays can be init.

>
>> +uint32_t __read_mostly raw_featureset[FSCAPINTS];
>> +uint32_t __read_mostly host_featureset[FSCAPINTS];
>> +uint32_t __read_mostly pv_featureset[FSCAPINTS];
>> +uint32_t __read_mostly hvm_featureset[FSCAPINTS];
>> +
>> +static void sanitise_featureset(uint32_t *fs)
>> +{
>> +    unsigned int i;
>> +
>> +    for ( i = 0; i < FSCAPINTS; ++i )
>> +    {
>> +        /* Clamp to known mask. */
>> +        fs[i] &= known_features[i];
>> +    }
>> +
>> +    switch ( boot_cpu_data.x86_vendor )
>> +    {
>> +    case X86_VENDOR_INTEL:
>> +        /* Intel clears the common bits in e1d. */
>> +        fs[FEATURESET_e1d] &= ~COMMON_1D;
>> +        break;
>> +
>> +    case X86_VENDOR_AMD:
>> +        /* AMD duplicates the common bits between 1d and e1d. */
>> +        fs[FEATURESET_e1d] = ((fs[FEATURESET_1d]  &  COMMON_1D) |
>> +                              (fs[FEATURESET_e1d] & ~COMMON_1D));
>> +        break;
>> +    }
> How is this meant to work with cross vendor migration?

I don't see how cross-vendor is relevant here.  This is about reporting
the hosts modified featureset accurately to the toolstack.

Once the deep dependency logic logic is inserted in here, it is possible
that some of the common features are modified, at which point their
shared nature on AMD needs re-calculating.

>
>> +static void calculate_raw_featureset(void)
>> +{
>> +    unsigned int i, max, tmp;
>> +
>> +    max = cpuid_eax(0);
>> +
>> +    if ( max >= 1 )
>> +        cpuid(0x1, &tmp, &tmp,
>> +              &raw_featureset[FEATURESET_1c],
>> +              &raw_featureset[FEATURESET_1d]);
>> +    if ( max >= 7 )
>> +        cpuid_count(0x7, 0, &tmp,
>> +                    &raw_featureset[FEATURESET_7b0],
>> +                    &raw_featureset[FEATURESET_7c0],
>> +                    &tmp);
>> +    if ( max >= 0xd )
>> +        cpuid_count(0xd, 1,
>> +                    &raw_featureset[FEATURESET_Da1],
>> +                    &tmp, &tmp, &tmp);
>> +
>> +    max = cpuid_eax(0x80000000);
>> +    if ( max >= 0x80000001 )
> I don't recall where it was that I recently stumbled across a similar
> check, but this is dangerous: Instead of >= this should check that
> the upper 16 bits equal 0x8000 and the lower ones are >= 1.

Ok.

>
>> +static void calculate_host_featureset(void)
>> +{
>> +    memcpy(host_featureset, boot_cpu_data.x86_capability,
>> +           sizeof(host_featureset));
>> +}
> Why not simply
>
> #define host_featureset boot_cpu_data.x86_capability

ARRAY_SIZE(host_featureset) changes, although I guess this won't matter
if I standardise on FSCAPINTS.

>
> ?
>
>> +static void calculate_pv_featureset(void)
>> +{
>> +    unsigned int i;
>> +
>> +    for ( i = 0; i < ARRAY_SIZE(pv_featureset); ++i )
>> +        pv_featureset[i] = host_featureset[i] & pv_featuremask[i];
> I think when two arrays are involved simply using FSCAPINTS
> as the upper bound would be more appropriate (and shorter).
>
>> +static void calculate_hvm_featureset(void)
>> +{
>> +    unsigned int i;
>> +    const uint32_t *hvm_featuremask;
>> +
>> +    if ( !hvm_enabled )
>> +        return;
>> +
>> +    hvm_featuremask = hvm_funcs.hap_supported ?
>> +        hvm_hap_featuremask : hvm_shadow_featuremask;
>> +
>> +    for ( i = 0; i < ARRAY_SIZE(hvm_featureset); ++i )
>> +        hvm_featureset[i] = host_featureset[i] & hvm_featuremask[i];
>> +
>> +    /* Unconditionally claim to be able to set the hypervisor bit. */
>> +    __set_bit(X86_FEATURE_HYPERVISOR, hvm_featureset);
>> +
>> +    /*
>> +     * On AMD, PV guests are entirely unable to use 'sysenter' as Xen runs in
>> +     * long mode (and init_amd() has cleared it out of host capabilities), but
>> +     * HVM guests are able if running in protected mode.
>> +     */
>> +    if ( (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) &&
>> +         test_bit(X86_FEATURE_SEP, raw_featureset) )
>> +        __set_bit(X86_FEATURE_SEP, hvm_featureset);
>> +
>> +    /*
>> +     * With VT-x, some features are only supported by Xen if dedicated
>> +     * hardware support is also available.
>> +     */
>> +    if ( cpu_has_vmx )
>> +    {
>> +        if ( !(vmx_vmexit_control & VM_EXIT_CLEAR_BNDCFGS) ||
>> +             !(vmx_vmentry_control & VM_ENTRY_LOAD_BNDCFGS) )
>> +            __clear_bit(X86_FEATURE_MPX, hvm_featureset);
>> +
>> +        if ( !cpu_has_vmx_xsaves )
>> +            __clear_bit(X86_FEATURE_XSAVES, hvm_featureset);
>> +
>> +        if ( !cpu_has_vmx_pcommit )
>> +            __clear_bit(X86_FEATURE_PCOMMIT, hvm_featureset);
>> +    }
>> +
>> +    sanitise_featureset(pv_featureset);
> s/pv_/hvm_/ ?

Oops yes.

>
>>  static void __maybe_unused build_assertions(void)
> While affecting an earlier patch - __init?

Can do, but nothing from this actually gets emitted into a translation unit.

>
>> --- a/xen/include/asm-x86/cpuid.h
>> +++ b/xen/include/asm-x86/cpuid.h
>> @@ -5,12 +5,29 @@
>>  
>>  #define FSCAPINTS FEATURESET_NR_ENTRIES
>>  
>> +#define FEATURESET_1d     0 /* 0x00000001.edx      */
>> +#define FEATURESET_1c     1 /* 0x00000001.ecx      */
>> +#define FEATURESET_e1d    2 /* 0x80000001.edx      */
>> +#define FEATURESET_e1c    3 /* 0x80000001.ecx      */
>> +#define FEATURESET_Da1    4 /* 0x0000000d:1.eax    */
>> +#define FEATURESET_7b0    5 /* 0x00000007:0.ebx    */
>> +#define FEATURESET_7c0    6 /* 0x00000007:0.ecx    */
>> +#define FEATURESET_e7d    7 /* 0x80000007.edx      */
>> +#define FEATURESET_e8b    8 /* 0x80000008.ebx      */
>> +
>>  #ifndef __ASSEMBLY__
>>  #include <xen/types.h>
>>  
>>  extern const uint32_t known_features[FSCAPINTS];
>>  extern const uint32_t inverted_features[FSCAPINTS];
>>  
>> +extern uint32_t raw_featureset[FSCAPINTS];
>> +extern uint32_t host_featureset[FSCAPINTS];
>> +extern uint32_t pv_featureset[FSCAPINTS];
>> +extern uint32_t hvm_featureset[FSCAPINTS];
> I wonder whether it wouldn't be better to make these accessible
> only via function calls, with the functions returning pointers to
> const, to avoid seducing people into fiddling with these from
> outside cpuid.c.

That does make it more awkward to iterate over, although I suppose I
don't do too much of that outside of cpuid.c

Also, without LTO, the function calls can't actually be omitted.  I am
tempted to leave it as-is and rely on code review to catch miuses.

~Andrew
Jan Beulich Feb. 15, 2016, 3:07 p.m. UTC | #3
>>> On 15.02.16 at 15:57, <andrew.cooper3@citrix.com> wrote:
> On 15/02/16 13:37, Jan Beulich wrote:
>>>>> On 05.02.16 at 14:42, <andrew.cooper3@citrix.com> wrote:
>>> --- a/xen/arch/x86/cpuid.c
>>> +++ b/xen/arch/x86/cpuid.c
>>> @@ -1,13 +1,165 @@
>>>  #include <xen/lib.h>
>>>  #include <asm/cpuid.h>
>>> +#include <asm/hvm/hvm.h>
>>> +#include <asm/hvm/vmx/vmcs.h>
>>> +#include <asm/processor.h>
>>> +
>>> +#define COMMON_1D INIT_COMMON_FEATURES
>>>  
>>>  const uint32_t known_features[] = INIT_KNOWN_FEATURES;
>>>  const uint32_t inverted_features[] = INIT_INVERTED_FEATURES;
>>>  
>>> +static const uint32_t pv_featuremask[] = INIT_PV_FEATURES;
>>> +static const uint32_t hvm_shadow_featuremask[] = INIT_HVM_SHADOW_FEATURES;
>>> +static const uint32_t hvm_hap_featuremask[] = INIT_HVM_HAP_FEATURES;
>> Considering that calculate_featuresets() gets called from
>> __start_xen(), that function and some others as well as the
>> above could apparently all live in .init.* sections.
> 
> known and inverted features are used from the AP bringup path, so can't
> be init.

Of course. My comment was only about the additions done here.

>>> +uint32_t __read_mostly raw_featureset[FSCAPINTS];
>>> +uint32_t __read_mostly host_featureset[FSCAPINTS];
>>> +uint32_t __read_mostly pv_featureset[FSCAPINTS];
>>> +uint32_t __read_mostly hvm_featureset[FSCAPINTS];
>>> +
>>> +static void sanitise_featureset(uint32_t *fs)
>>> +{
>>> +    unsigned int i;
>>> +
>>> +    for ( i = 0; i < FSCAPINTS; ++i )
>>> +    {
>>> +        /* Clamp to known mask. */
>>> +        fs[i] &= known_features[i];
>>> +    }
>>> +
>>> +    switch ( boot_cpu_data.x86_vendor )
>>> +    {
>>> +    case X86_VENDOR_INTEL:
>>> +        /* Intel clears the common bits in e1d. */
>>> +        fs[FEATURESET_e1d] &= ~COMMON_1D;
>>> +        break;
>>> +
>>> +    case X86_VENDOR_AMD:
>>> +        /* AMD duplicates the common bits between 1d and e1d. */
>>> +        fs[FEATURESET_e1d] = ((fs[FEATURESET_1d]  &  COMMON_1D) |
>>> +                              (fs[FEATURESET_e1d] & ~COMMON_1D));
>>> +        break;
>>> +    }
>> How is this meant to work with cross vendor migration?
> 
> I don't see how cross-vendor is relevant here.  This is about reporting
> the hosts modified featureset accurately to the toolstack.

I.e. you're not later going to use what you generate here to also
massage (or at least validate) what guests are going to see?

>>> +static void calculate_host_featureset(void)
>>> +{
>>> +    memcpy(host_featureset, boot_cpu_data.x86_capability,
>>> +           sizeof(host_featureset));
>>> +}
>> Why not simply
>>
>> #define host_featureset boot_cpu_data.x86_capability
> 
> ARRAY_SIZE(host_featureset) changes, although I guess this won't matter
> if I standardise on FSCAPINTS.

Right, since boot_cpu_data.x86_capability[] cannot, according to
earlier patches, have less than FSCAPINTS elements.

>>>  static void __maybe_unused build_assertions(void)
>> While affecting an earlier patch - __init?
> 
> Can do, but nothing from this actually gets emitted into a translation unit.

With the few(?) compiler versions you managed to test against, I
suppose...

>>> --- a/xen/include/asm-x86/cpuid.h
>>> +++ b/xen/include/asm-x86/cpuid.h
>>> @@ -5,12 +5,29 @@
>>>  
>>>  #define FSCAPINTS FEATURESET_NR_ENTRIES
>>>  
>>> +#define FEATURESET_1d     0 /* 0x00000001.edx      */
>>> +#define FEATURESET_1c     1 /* 0x00000001.ecx      */
>>> +#define FEATURESET_e1d    2 /* 0x80000001.edx      */
>>> +#define FEATURESET_e1c    3 /* 0x80000001.ecx      */
>>> +#define FEATURESET_Da1    4 /* 0x0000000d:1.eax    */
>>> +#define FEATURESET_7b0    5 /* 0x00000007:0.ebx    */
>>> +#define FEATURESET_7c0    6 /* 0x00000007:0.ecx    */
>>> +#define FEATURESET_e7d    7 /* 0x80000007.edx      */
>>> +#define FEATURESET_e8b    8 /* 0x80000008.ebx      */
>>> +
>>>  #ifndef __ASSEMBLY__
>>>  #include <xen/types.h>
>>>  
>>>  extern const uint32_t known_features[FSCAPINTS];
>>>  extern const uint32_t inverted_features[FSCAPINTS];
>>>  
>>> +extern uint32_t raw_featureset[FSCAPINTS];
>>> +extern uint32_t host_featureset[FSCAPINTS];
>>> +extern uint32_t pv_featureset[FSCAPINTS];
>>> +extern uint32_t hvm_featureset[FSCAPINTS];
>> I wonder whether it wouldn't be better to make these accessible
>> only via function calls, with the functions returning pointers to
>> const, to avoid seducing people into fiddling with these from
>> outside cpuid.c.
> 
> That does make it more awkward to iterate over, although I suppose I
> don't do too much of that outside of cpuid.c
> 
> Also, without LTO, the function calls can't actually be omitted.  I am
> tempted to leave it as-is and rely on code review to catch miuses.

Okay then.

Jan
Andrew Cooper Feb. 15, 2016, 3:52 p.m. UTC | #4
On 15/02/16 15:07, Jan Beulich wrote:
>
>>>> +uint32_t __read_mostly raw_featureset[FSCAPINTS];
>>>> +uint32_t __read_mostly host_featureset[FSCAPINTS];
>>>> +uint32_t __read_mostly pv_featureset[FSCAPINTS];
>>>> +uint32_t __read_mostly hvm_featureset[FSCAPINTS];
>>>> +
>>>> +static void sanitise_featureset(uint32_t *fs)
>>>> +{
>>>> +    unsigned int i;
>>>> +
>>>> +    for ( i = 0; i < FSCAPINTS; ++i )
>>>> +    {
>>>> +        /* Clamp to known mask. */
>>>> +        fs[i] &= known_features[i];
>>>> +    }
>>>> +
>>>> +    switch ( boot_cpu_data.x86_vendor )
>>>> +    {
>>>> +    case X86_VENDOR_INTEL:
>>>> +        /* Intel clears the common bits in e1d. */
>>>> +        fs[FEATURESET_e1d] &= ~COMMON_1D;
>>>> +        break;
>>>> +
>>>> +    case X86_VENDOR_AMD:
>>>> +        /* AMD duplicates the common bits between 1d and e1d. */
>>>> +        fs[FEATURESET_e1d] = ((fs[FEATURESET_1d]  &  COMMON_1D) |
>>>> +                              (fs[FEATURESET_e1d] & ~COMMON_1D));
>>>> +        break;
>>>> +    }
>>> How is this meant to work with cross vendor migration?
>> I don't see how cross-vendor is relevant here.  This is about reporting
>> the hosts modified featureset accurately to the toolstack.
> I.e. you're not later going to use what you generate here to also
> massage (or at least validate) what guests are going to see?

Oh right - as currently implemented, this will clobber features on an
Intel host attempting to emulate AMD through cross-vendor.

I will reconsider the logic in v3.  This isn't trivial to fix,
especially given that we don't yet have per-domain policies.

~Andrew
diff mbox

Patch

diff --git a/xen/arch/x86/cpuid.c b/xen/arch/x86/cpuid.c
index 30a3392..1af0e6c 100644
--- a/xen/arch/x86/cpuid.c
+++ b/xen/arch/x86/cpuid.c
@@ -1,13 +1,165 @@ 
 #include <xen/lib.h>
 #include <asm/cpuid.h>
+#include <asm/hvm/hvm.h>
+#include <asm/hvm/vmx/vmcs.h>
+#include <asm/processor.h>
+
+#define COMMON_1D INIT_COMMON_FEATURES
 
 const uint32_t known_features[] = INIT_KNOWN_FEATURES;
 const uint32_t inverted_features[] = INIT_INVERTED_FEATURES;
 
+static const uint32_t pv_featuremask[] = INIT_PV_FEATURES;
+static const uint32_t hvm_shadow_featuremask[] = INIT_HVM_SHADOW_FEATURES;
+static const uint32_t hvm_hap_featuremask[] = INIT_HVM_HAP_FEATURES;
+
+uint32_t __read_mostly raw_featureset[FSCAPINTS];
+uint32_t __read_mostly host_featureset[FSCAPINTS];
+uint32_t __read_mostly pv_featureset[FSCAPINTS];
+uint32_t __read_mostly hvm_featureset[FSCAPINTS];
+
+static void sanitise_featureset(uint32_t *fs)
+{
+    unsigned int i;
+
+    for ( i = 0; i < FSCAPINTS; ++i )
+    {
+        /* Clamp to known mask. */
+        fs[i] &= known_features[i];
+    }
+
+    switch ( boot_cpu_data.x86_vendor )
+    {
+    case X86_VENDOR_INTEL:
+        /* Intel clears the common bits in e1d. */
+        fs[FEATURESET_e1d] &= ~COMMON_1D;
+        break;
+
+    case X86_VENDOR_AMD:
+        /* AMD duplicates the common bits between 1d and e1d. */
+        fs[FEATURESET_e1d] = ((fs[FEATURESET_1d]  &  COMMON_1D) |
+                              (fs[FEATURESET_e1d] & ~COMMON_1D));
+        break;
+    }
+}
+
+static void calculate_raw_featureset(void)
+{
+    unsigned int i, max, tmp;
+
+    max = cpuid_eax(0);
+
+    if ( max >= 1 )
+        cpuid(0x1, &tmp, &tmp,
+              &raw_featureset[FEATURESET_1c],
+              &raw_featureset[FEATURESET_1d]);
+    if ( max >= 7 )
+        cpuid_count(0x7, 0, &tmp,
+                    &raw_featureset[FEATURESET_7b0],
+                    &raw_featureset[FEATURESET_7c0],
+                    &tmp);
+    if ( max >= 0xd )
+        cpuid_count(0xd, 1,
+                    &raw_featureset[FEATURESET_Da1],
+                    &tmp, &tmp, &tmp);
+
+    max = cpuid_eax(0x80000000);
+    if ( max >= 0x80000001 )
+        cpuid(0x80000001, &tmp, &tmp,
+              &raw_featureset[FEATURESET_e1c],
+              &raw_featureset[FEATURESET_e1d]);
+    if ( max >= 0x80000007 )
+        cpuid(0x80000007, &tmp, &tmp, &tmp,
+              &raw_featureset[FEATURESET_e7d]);
+    if ( max >= 0x80000008 )
+        cpuid(0x80000008, &tmp,
+              &raw_featureset[FEATURESET_e8b],
+              &tmp, &tmp);
+
+    for ( i = 0; i < ARRAY_SIZE(raw_featureset); ++i )
+        raw_featureset[i] ^= inverted_features[i];
+}
+
+static void calculate_host_featureset(void)
+{
+    memcpy(host_featureset, boot_cpu_data.x86_capability,
+           sizeof(host_featureset));
+}
+
+static void calculate_pv_featureset(void)
+{
+    unsigned int i;
+
+    for ( i = 0; i < ARRAY_SIZE(pv_featureset); ++i )
+        pv_featureset[i] = host_featureset[i] & pv_featuremask[i];
+
+    /* Unconditionally claim to be able to set the hypervisor bit. */
+    __set_bit(X86_FEATURE_HYPERVISOR, pv_featureset);
+
+    sanitise_featureset(pv_featureset);
+}
+
+static void calculate_hvm_featureset(void)
+{
+    unsigned int i;
+    const uint32_t *hvm_featuremask;
+
+    if ( !hvm_enabled )
+        return;
+
+    hvm_featuremask = hvm_funcs.hap_supported ?
+        hvm_hap_featuremask : hvm_shadow_featuremask;
+
+    for ( i = 0; i < ARRAY_SIZE(hvm_featureset); ++i )
+        hvm_featureset[i] = host_featureset[i] & hvm_featuremask[i];
+
+    /* Unconditionally claim to be able to set the hypervisor bit. */
+    __set_bit(X86_FEATURE_HYPERVISOR, hvm_featureset);
+
+    /*
+     * On AMD, PV guests are entirely unable to use 'sysenter' as Xen runs in
+     * long mode (and init_amd() has cleared it out of host capabilities), but
+     * HVM guests are able if running in protected mode.
+     */
+    if ( (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) &&
+         test_bit(X86_FEATURE_SEP, raw_featureset) )
+        __set_bit(X86_FEATURE_SEP, hvm_featureset);
+
+    /*
+     * With VT-x, some features are only supported by Xen if dedicated
+     * hardware support is also available.
+     */
+    if ( cpu_has_vmx )
+    {
+        if ( !(vmx_vmexit_control & VM_EXIT_CLEAR_BNDCFGS) ||
+             !(vmx_vmentry_control & VM_ENTRY_LOAD_BNDCFGS) )
+            __clear_bit(X86_FEATURE_MPX, hvm_featureset);
+
+        if ( !cpu_has_vmx_xsaves )
+            __clear_bit(X86_FEATURE_XSAVES, hvm_featureset);
+
+        if ( !cpu_has_vmx_pcommit )
+            __clear_bit(X86_FEATURE_PCOMMIT, hvm_featureset);
+    }
+
+    sanitise_featureset(pv_featureset);
+}
+
+void calculate_featuresets(void)
+{
+    calculate_raw_featureset();
+    calculate_host_featureset();
+    calculate_pv_featureset();
+    calculate_hvm_featureset();
+}
+
 static void __maybe_unused build_assertions(void)
 {
     BUILD_BUG_ON(ARRAY_SIZE(known_features) != FSCAPINTS);
     BUILD_BUG_ON(ARRAY_SIZE(inverted_features) != FSCAPINTS);
+    BUILD_BUG_ON(ARRAY_SIZE(pv_featuremask) != FSCAPINTS);
+    BUILD_BUG_ON(ARRAY_SIZE(hvm_shadow_featuremask) != FSCAPINTS);
+    BUILD_BUG_ON(ARRAY_SIZE(hvm_hap_featuremask) != FSCAPINTS);
 }
 
 /*
diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
index 76c7b0f..50e4e51 100644
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -50,6 +50,7 @@ 
 #include <asm/nmi.h>
 #include <asm/alternative.h>
 #include <asm/mc146818rtc.h>
+#include <asm/cpuid.h>
 
 /* opt_nosmp: If true, secondary processors are ignored. */
 static bool_t __initdata opt_nosmp;
@@ -1437,6 +1438,8 @@  void __init noreturn __start_xen(unsigned long mbi_p)
                "Multiple initrd candidates, picking module #%u\n",
                initrdidx);
 
+    calculate_featuresets();
+
     /*
      * Temporarily clear SMAP in CR4 to allow user-accesses in construct_dom0().
      * This saves a large number of corner cases interactions with
diff --git a/xen/include/asm-x86/cpuid.h b/xen/include/asm-x86/cpuid.h
index 341dbc1..18ba95b 100644
--- a/xen/include/asm-x86/cpuid.h
+++ b/xen/include/asm-x86/cpuid.h
@@ -5,12 +5,29 @@ 
 
 #define FSCAPINTS FEATURESET_NR_ENTRIES
 
+#define FEATURESET_1d     0 /* 0x00000001.edx      */
+#define FEATURESET_1c     1 /* 0x00000001.ecx      */
+#define FEATURESET_e1d    2 /* 0x80000001.edx      */
+#define FEATURESET_e1c    3 /* 0x80000001.ecx      */
+#define FEATURESET_Da1    4 /* 0x0000000d:1.eax    */
+#define FEATURESET_7b0    5 /* 0x00000007:0.ebx    */
+#define FEATURESET_7c0    6 /* 0x00000007:0.ecx    */
+#define FEATURESET_e7d    7 /* 0x80000007.edx      */
+#define FEATURESET_e8b    8 /* 0x80000008.ebx      */
+
 #ifndef __ASSEMBLY__
 #include <xen/types.h>
 
 extern const uint32_t known_features[FSCAPINTS];
 extern const uint32_t inverted_features[FSCAPINTS];
 
+extern uint32_t raw_featureset[FSCAPINTS];
+extern uint32_t host_featureset[FSCAPINTS];
+extern uint32_t pv_featureset[FSCAPINTS];
+extern uint32_t hvm_featureset[FSCAPINTS];
+
+void calculate_featuresets(void);
+
 #endif /* __ASSEMBLY__ */
 #endif /* !__X86_CPUID_H__ */