diff mbox series

x86/cpu-policy: Extend the guest max policy max leaf/subleaves

Message ID 20241029175505.2698661-1-andrew.cooper3@citrix.com (mailing list archive)
State New
Headers show
Series x86/cpu-policy: Extend the guest max policy max leaf/subleaves | expand

Commit Message

Andrew Cooper Oct. 29, 2024, 5:55 p.m. UTC
We already have one migration case opencoded (feat.max_subleaf).  A more
recent discovery is that we advertise x2APIC to guests without ensuring that
we provide max_leaf >= 0xb.

In general, any leaf known to Xen can be safely configured by the toolstack if
it doesn't violate other constraints.

Therefore, introduce guest_common_{max,default}_leaves() to generalise the
special case we currently have for feat.max_subleaf, in preparation to be able
to provide x2APIC topology in leaf 0xb even on older hardware.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Roger Pau Monné <roger.pau@citrix.com>
CC: Alejandro Vallejo <alejandro.vallejo@cloud.com>

On a KabyLake I have to hand, here's the delta in what xen-cpuid -p reports:

  git diff --no-index xen-cpuid-p-{before,after}.log
  diff --git a/xen-cpuid-p-before.log b/xen-cpuid-p-after.log
  index 5a76d05..24e22be 100644
  --- a/xen-cpuid-p-before.log
  +++ b/xen-cpuid-p-after.log
  @@ -61,7 +61,7 @@ Host policy: 33 leaves, 2 MSRs
     index    -> value
     000000ce -> 0000000080000000
     0000010a -> 000000000e000c04
  -PV Max policy: 33 leaves, 2 MSRs
  +PV Max policy: 58 leaves, 2 MSRs
    CPUID:
     leaf     subleaf  -> eax      ebx      ecx      edx
     00000000:ffffffff -> 0000000d:756e6547:6c65746e:49656e69
  @@ -75,7 +75,7 @@ PV Max policy: 33 leaves, 2 MSRs
     0000000d:00000000 -> 00000007:00000000:00000340:00000000
     0000000d:00000001 -> 00000007:00000000:00000000:00000000
     0000000d:00000002 -> 00000100:00000240:00000000:00000000
  -  80000000:ffffffff -> 80000008:00000000:00000000:00000000
  +  80000000:ffffffff -> 80000021:00000000:00000000:00000000
     80000001:ffffffff -> 00000000:00000000:00000123:28100800
     80000002:ffffffff -> 65746e49:2952286c:6f655820:2952286e
     80000003:ffffffff -> 55504320:2d334520:30333231:20367620
  @@ -87,7 +87,7 @@ PV Max policy: 33 leaves, 2 MSRs
     index    -> value
     000000ce -> 0000000080000000
     0000010a -> 000000001c020004
  -HVM Max policy: 35 leaves, 2 MSRs
  +HVM Max policy: 60 leaves, 2 MSRs
    CPUID:
     leaf     subleaf  -> eax      ebx      ecx      edx
     00000000:ffffffff -> 0000000d:756e6547:6c65746e:49656e69
  @@ -103,7 +103,7 @@ HVM Max policy: 35 leaves, 2 MSRs
     0000000d:00000002 -> 00000100:00000240:00000000:00000000
     0000000d:00000003 -> 00000040:000003c0:00000000:00000000
     0000000d:00000004 -> 00000040:00000400:00000000:00000000
  -  80000000:ffffffff -> 80000008:00000000:00000000:00000000
  +  80000000:ffffffff -> 80000021:00000000:00000000:00000000
     80000001:ffffffff -> 00000000:00000000:00000123:2c100800
     80000002:ffffffff -> 65746e49:2952286c:6f655820:2952286e
     80000003:ffffffff -> 55504320:2d334520:30333231:20367620
---
 xen/arch/x86/cpu-policy.c | 39 +++++++++++++++++++++++++--------------
 1 file changed, 25 insertions(+), 14 deletions(-)


base-commit: 56bd76925ec35085528d778e46123b9d10a66018

Comments

Alejandro Vallejo Oct. 29, 2024, 6:26 p.m. UTC | #1
On Tue Oct 29, 2024 at 5:55 PM GMT, Andrew Cooper wrote:
> We already have one migration case opencoded (feat.max_subleaf).  A more
> recent discovery is that we advertise x2APIC to guests without ensuring that
> we provide max_leaf >= 0xb.
>
> In general, any leaf known to Xen can be safely configured by the toolstack if
> it doesn't violate other constraints.
>
> Therefore, introduce guest_common_{max,default}_leaves() to generalise the
> special case we currently have for feat.max_subleaf, in preparation to be able
> to provide x2APIC topology in leaf 0xb even on older hardware.
>
> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>

Reviewed-by: Alejandro Vallejo <alejandro.vallejo@cloud.com>

Cheers,
Alejandro
Jan Beulich Oct. 30, 2024, 6:51 a.m. UTC | #2
On 29.10.2024 18:55, Andrew Cooper wrote:
> We already have one migration case opencoded (feat.max_subleaf).  A more
> recent discovery is that we advertise x2APIC to guests without ensuring that
> we provide max_leaf >= 0xb.
> 
> In general, any leaf known to Xen can be safely configured by the toolstack if
> it doesn't violate other constraints.
> 
> Therefore, introduce guest_common_{max,default}_leaves() to generalise the
> special case we currently have for feat.max_subleaf, in preparation to be able
> to provide x2APIC topology in leaf 0xb even on older hardware.
> 
> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>

Reviewed-by: Jan Beulich <jbeulich@suse.com>

I'll have to update the AMX logic accordingly (maybe also the AVX10 one).

I'd like to point out that this highlights a naming anomaly in
x86_cpu_policies_are_compatible(): update_domain_cpu_policy() passes in
the respective max policy as first argument. Imo the first parameter of
the function would better be named "max" there.

> --- a/xen/arch/x86/cpu-policy.c
> +++ b/xen/arch/x86/cpu-policy.c
> @@ -391,6 +391,27 @@ static void __init calculate_host_policy(void)
>      p->platform_info.cpuid_faulting = cpu_has_cpuid_faulting;
>  }
>  
> +/*
> + * Guest max policies can have any max leaf/subleaf within bounds.
> + *
> + * - Some incoming VMs have a larger-than-necessary feat max_subleaf.
> + * - Some VMs we'd like to synthesise leaves not present on the host.
> + */
> +static void __init guest_common_max_leaves(struct cpu_policy *p)
> +{
> +    p->basic.max_leaf       = ARRAY_SIZE(p->basic.raw) - 1;
> +    p->feat.max_subleaf     = ARRAY_SIZE(p->feat.raw) - 1;
> +    p->extd.max_leaf        = 0x80000000U + ARRAY_SIZE(p->extd.raw) - 1;
> +}
> +
> +/* Guest default policies inherit the host max leaf/subleaf settings. */
> +static void __init guest_common_default_leaves(struct cpu_policy *p)
> +{
> +    p->basic.max_leaf       = host_cpu_policy.basic.max_leaf;
> +    p->feat.max_subleaf     = host_cpu_policy.feat.max_subleaf;
> +    p->extd.max_leaf        = host_cpu_policy.extd.max_leaf;
> +}

Which sadly still leaves open how to suitably shrink the max values,
when they're larger than necessary (for the guest).

Jan
Roger Pau Monné Oct. 30, 2024, 8:59 a.m. UTC | #3
On Tue, Oct 29, 2024 at 05:55:05PM +0000, Andrew Cooper wrote:
> We already have one migration case opencoded (feat.max_subleaf).  A more
> recent discovery is that we advertise x2APIC to guests without ensuring that
> we provide max_leaf >= 0xb.
> 
> In general, any leaf known to Xen can be safely configured by the toolstack if
> it doesn't violate other constraints.
> 
> Therefore, introduce guest_common_{max,default}_leaves() to generalise the
> special case we currently have for feat.max_subleaf, in preparation to be able
> to provide x2APIC topology in leaf 0xb even on older hardware.
> 
> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
> ---
> CC: Jan Beulich <JBeulich@suse.com>
> CC: Roger Pau Monné <roger.pau@citrix.com>
> CC: Alejandro Vallejo <alejandro.vallejo@cloud.com>
> 
> On a KabyLake I have to hand, here's the delta in what xen-cpuid -p reports:
> 
>   git diff --no-index xen-cpuid-p-{before,after}.log
>   diff --git a/xen-cpuid-p-before.log b/xen-cpuid-p-after.log
>   index 5a76d05..24e22be 100644
>   --- a/xen-cpuid-p-before.log
>   +++ b/xen-cpuid-p-after.log
>   @@ -61,7 +61,7 @@ Host policy: 33 leaves, 2 MSRs
>      index    -> value
>      000000ce -> 0000000080000000
>      0000010a -> 000000000e000c04
>   -PV Max policy: 33 leaves, 2 MSRs
>   +PV Max policy: 58 leaves, 2 MSRs
>     CPUID:
>      leaf     subleaf  -> eax      ebx      ecx      edx
>      00000000:ffffffff -> 0000000d:756e6547:6c65746e:49656e69
>   @@ -75,7 +75,7 @@ PV Max policy: 33 leaves, 2 MSRs
>      0000000d:00000000 -> 00000007:00000000:00000340:00000000
>      0000000d:00000001 -> 00000007:00000000:00000000:00000000
>      0000000d:00000002 -> 00000100:00000240:00000000:00000000
>   -  80000000:ffffffff -> 80000008:00000000:00000000:00000000
>   +  80000000:ffffffff -> 80000021:00000000:00000000:00000000
>      80000001:ffffffff -> 00000000:00000000:00000123:28100800
>      80000002:ffffffff -> 65746e49:2952286c:6f655820:2952286e
>      80000003:ffffffff -> 55504320:2d334520:30333231:20367620
>   @@ -87,7 +87,7 @@ PV Max policy: 33 leaves, 2 MSRs
>      index    -> value
>      000000ce -> 0000000080000000
>      0000010a -> 000000001c020004
>   -HVM Max policy: 35 leaves, 2 MSRs
>   +HVM Max policy: 60 leaves, 2 MSRs
>     CPUID:
>      leaf     subleaf  -> eax      ebx      ecx      edx
>      00000000:ffffffff -> 0000000d:756e6547:6c65746e:49656e69
>   @@ -103,7 +103,7 @@ HVM Max policy: 35 leaves, 2 MSRs
>      0000000d:00000002 -> 00000100:00000240:00000000:00000000
>      0000000d:00000003 -> 00000040:000003c0:00000000:00000000
>      0000000d:00000004 -> 00000040:00000400:00000000:00000000
>   -  80000000:ffffffff -> 80000008:00000000:00000000:00000000
>   +  80000000:ffffffff -> 80000021:00000000:00000000:00000000
>      80000001:ffffffff -> 00000000:00000000:00000123:2c100800
>      80000002:ffffffff -> 65746e49:2952286c:6f655820:2952286e
>      80000003:ffffffff -> 55504320:2d334520:30333231:20367620
> ---
>  xen/arch/x86/cpu-policy.c | 39 +++++++++++++++++++++++++--------------
>  1 file changed, 25 insertions(+), 14 deletions(-)
> 
> diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c
> index b6d9fad56773..78bc9872b09a 100644
> --- a/xen/arch/x86/cpu-policy.c
> +++ b/xen/arch/x86/cpu-policy.c
> @@ -391,6 +391,27 @@ static void __init calculate_host_policy(void)
>      p->platform_info.cpuid_faulting = cpu_has_cpuid_faulting;
>  }
>  
> +/*
> + * Guest max policies can have any max leaf/subleaf within bounds.
> + *
> + * - Some incoming VMs have a larger-than-necessary feat max_subleaf.
> + * - Some VMs we'd like to synthesise leaves not present on the host.
> + */
> +static void __init guest_common_max_leaves(struct cpu_policy *p)
> +{
> +    p->basic.max_leaf       = ARRAY_SIZE(p->basic.raw) - 1;
> +    p->feat.max_subleaf     = ARRAY_SIZE(p->feat.raw) - 1;
> +    p->extd.max_leaf        = 0x80000000U + ARRAY_SIZE(p->extd.raw) - 1;
> +}
> +
> +/* Guest default policies inherit the host max leaf/subleaf settings. */
> +static void __init guest_common_default_leaves(struct cpu_policy *p)
> +{
> +    p->basic.max_leaf       = host_cpu_policy.basic.max_leaf;
> +    p->feat.max_subleaf     = host_cpu_policy.feat.max_subleaf;
> +    p->extd.max_leaf        = host_cpu_policy.extd.max_leaf;
> +}

I think this what I'm going to ask is future work.  After the
modifications done to the host policy by max functions
(calculate_{hvm,pv}_max_policy()) won't the max {sub,}leaf adjustments
better be done taking into account the contents of the policy, rather
than capping to the host values?

(note this comment is strictly for guest_common_default_leaves(), the
max version is fine using ARRAY_SIZE).

Thanks, Roger.
Andrew Cooper Oct. 30, 2024, 10:30 a.m. UTC | #4
On 30/10/2024 6:51 am, Jan Beulich wrote:
> On 29.10.2024 18:55, Andrew Cooper wrote:
>> We already have one migration case opencoded (feat.max_subleaf).  A more
>> recent discovery is that we advertise x2APIC to guests without ensuring that
>> we provide max_leaf >= 0xb.
>>
>> In general, any leaf known to Xen can be safely configured by the toolstack if
>> it doesn't violate other constraints.
>>
>> Therefore, introduce guest_common_{max,default}_leaves() to generalise the
>> special case we currently have for feat.max_subleaf, in preparation to be able
>> to provide x2APIC topology in leaf 0xb even on older hardware.
>>
>> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
> Reviewed-by: Jan Beulich <jbeulich@suse.com>
>
> I'll have to update the AMX logic accordingly (maybe also the AVX10 one).

Yeah - I need to get back to your shrinking series too.

> I'd like to point out that this highlights a naming anomaly in
> x86_cpu_policies_are_compatible(): update_domain_cpu_policy() passes in
> the respective max policy as first argument. Imo the first parameter of
> the function would better be named "max" there.

That's covered in the documentation.  It made sense when I first planned
things, but that was many many iterations ago.

>
>> --- a/xen/arch/x86/cpu-policy.c
>> +++ b/xen/arch/x86/cpu-policy.c
>> @@ -391,6 +391,27 @@ static void __init calculate_host_policy(void)
>>      p->platform_info.cpuid_faulting = cpu_has_cpuid_faulting;
>>  }
>>  
>> +/*
>> + * Guest max policies can have any max leaf/subleaf within bounds.
>> + *
>> + * - Some incoming VMs have a larger-than-necessary feat max_subleaf.
>> + * - Some VMs we'd like to synthesise leaves not present on the host.
>> + */
>> +static void __init guest_common_max_leaves(struct cpu_policy *p)
>> +{
>> +    p->basic.max_leaf       = ARRAY_SIZE(p->basic.raw) - 1;
>> +    p->feat.max_subleaf     = ARRAY_SIZE(p->feat.raw) - 1;
>> +    p->extd.max_leaf        = 0x80000000U + ARRAY_SIZE(p->extd.raw) - 1;
>> +}
>> +
>> +/* Guest default policies inherit the host max leaf/subleaf settings. */
>> +static void __init guest_common_default_leaves(struct cpu_policy *p)
>> +{
>> +    p->basic.max_leaf       = host_cpu_policy.basic.max_leaf;
>> +    p->feat.max_subleaf     = host_cpu_policy.feat.max_subleaf;
>> +    p->extd.max_leaf        = host_cpu_policy.extd.max_leaf;
>> +}
> Which sadly still leaves open how to suitably shrink the max values,
> when they're larger than necessary (for the guest).

Only the toolstack can do the shrinking, and only as the about the final
step after optional features have been activated.

~Andrew
Andrew Cooper Oct. 30, 2024, 10:39 a.m. UTC | #5
On 30/10/2024 8:59 am, Roger Pau Monné wrote:
> On Tue, Oct 29, 2024 at 05:55:05PM +0000, Andrew Cooper wrote:
>> diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c
>> index b6d9fad56773..78bc9872b09a 100644
>> --- a/xen/arch/x86/cpu-policy.c
>> +++ b/xen/arch/x86/cpu-policy.c
>> @@ -391,6 +391,27 @@ static void __init calculate_host_policy(void)
>>      p->platform_info.cpuid_faulting = cpu_has_cpuid_faulting;
>>  }
>>  
>> +/*
>> + * Guest max policies can have any max leaf/subleaf within bounds.
>> + *
>> + * - Some incoming VMs have a larger-than-necessary feat max_subleaf.
>> + * - Some VMs we'd like to synthesise leaves not present on the host.
>> + */
>> +static void __init guest_common_max_leaves(struct cpu_policy *p)
>> +{
>> +    p->basic.max_leaf       = ARRAY_SIZE(p->basic.raw) - 1;
>> +    p->feat.max_subleaf     = ARRAY_SIZE(p->feat.raw) - 1;
>> +    p->extd.max_leaf        = 0x80000000U + ARRAY_SIZE(p->extd.raw) - 1;
>> +}
>> +
>> +/* Guest default policies inherit the host max leaf/subleaf settings. */
>> +static void __init guest_common_default_leaves(struct cpu_policy *p)
>> +{
>> +    p->basic.max_leaf       = host_cpu_policy.basic.max_leaf;
>> +    p->feat.max_subleaf     = host_cpu_policy.feat.max_subleaf;
>> +    p->extd.max_leaf        = host_cpu_policy.extd.max_leaf;
>> +}
> I think this what I'm going to ask is future work.  After the
> modifications done to the host policy by max functions
> (calculate_{hvm,pv}_max_policy()) won't the max {sub,}leaf adjustments
> better be done taking into account the contents of the policy, rather
> than capping to the host values?
>
> (note this comment is strictly for guest_common_default_leaves(), the
> max version is fine using ARRAY_SIZE).

I'm afraid I don't follow.

calculate_{pv,hvm}_max_policy() don't modify the host policy.

~Andrew
Roger Pau Monné Oct. 30, 2024, 11:03 a.m. UTC | #6
On Wed, Oct 30, 2024 at 10:39:12AM +0000, Andrew Cooper wrote:
> On 30/10/2024 8:59 am, Roger Pau Monné wrote:
> > On Tue, Oct 29, 2024 at 05:55:05PM +0000, Andrew Cooper wrote:
> >> diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c
> >> index b6d9fad56773..78bc9872b09a 100644
> >> --- a/xen/arch/x86/cpu-policy.c
> >> +++ b/xen/arch/x86/cpu-policy.c
> >> @@ -391,6 +391,27 @@ static void __init calculate_host_policy(void)
> >>      p->platform_info.cpuid_faulting = cpu_has_cpuid_faulting;
> >>  }
> >>  
> >> +/*
> >> + * Guest max policies can have any max leaf/subleaf within bounds.
> >> + *
> >> + * - Some incoming VMs have a larger-than-necessary feat max_subleaf.
> >> + * - Some VMs we'd like to synthesise leaves not present on the host.
> >> + */
> >> +static void __init guest_common_max_leaves(struct cpu_policy *p)
> >> +{
> >> +    p->basic.max_leaf       = ARRAY_SIZE(p->basic.raw) - 1;
> >> +    p->feat.max_subleaf     = ARRAY_SIZE(p->feat.raw) - 1;
> >> +    p->extd.max_leaf        = 0x80000000U + ARRAY_SIZE(p->extd.raw) - 1;
> >> +}
> >> +
> >> +/* Guest default policies inherit the host max leaf/subleaf settings. */
> >> +static void __init guest_common_default_leaves(struct cpu_policy *p)
> >> +{
> >> +    p->basic.max_leaf       = host_cpu_policy.basic.max_leaf;
> >> +    p->feat.max_subleaf     = host_cpu_policy.feat.max_subleaf;
> >> +    p->extd.max_leaf        = host_cpu_policy.extd.max_leaf;
> >> +}
> > I think this what I'm going to ask is future work.  After the
> > modifications done to the host policy by max functions
> > (calculate_{hvm,pv}_max_policy()) won't the max {sub,}leaf adjustments
> > better be done taking into account the contents of the policy, rather
> > than capping to the host values?
> >
> > (note this comment is strictly for guest_common_default_leaves(), the
> > max version is fine using ARRAY_SIZE).
> 
> I'm afraid I don't follow.
> 
> calculate_{pv,hvm}_max_policy() don't modify the host policy.

Hm, I don't think I've expressed myself clearly, sorry.  Let me try
again.

calculate_{hvm,pv}_max_policy() extends the host policy by possibly
setting new features, and such extended policy is then used as the
base for the PV/HVM default policies.

Won't the resulting policy in calculate_{hvm,pv}_def_policy() risks
having bits set past the max {sub,}leaf in the host policy, as it's
based in {hvm,pv}_def_cpu_policy that might have such bits set?

Thanks, Roger.
Andrew Cooper Oct. 30, 2024, 2:45 p.m. UTC | #7
On 30/10/2024 11:03 am, Roger Pau Monné wrote:
> On Wed, Oct 30, 2024 at 10:39:12AM +0000, Andrew Cooper wrote:
>> On 30/10/2024 8:59 am, Roger Pau Monné wrote:
>>> On Tue, Oct 29, 2024 at 05:55:05PM +0000, Andrew Cooper wrote:
>>>> diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c
>>>> index b6d9fad56773..78bc9872b09a 100644
>>>> --- a/xen/arch/x86/cpu-policy.c
>>>> +++ b/xen/arch/x86/cpu-policy.c
>>>> @@ -391,6 +391,27 @@ static void __init calculate_host_policy(void)
>>>>      p->platform_info.cpuid_faulting = cpu_has_cpuid_faulting;
>>>>  }
>>>>  
>>>> +/*
>>>> + * Guest max policies can have any max leaf/subleaf within bounds.
>>>> + *
>>>> + * - Some incoming VMs have a larger-than-necessary feat max_subleaf.
>>>> + * - Some VMs we'd like to synthesise leaves not present on the host.
>>>> + */
>>>> +static void __init guest_common_max_leaves(struct cpu_policy *p)
>>>> +{
>>>> +    p->basic.max_leaf       = ARRAY_SIZE(p->basic.raw) - 1;
>>>> +    p->feat.max_subleaf     = ARRAY_SIZE(p->feat.raw) - 1;
>>>> +    p->extd.max_leaf        = 0x80000000U + ARRAY_SIZE(p->extd.raw) - 1;
>>>> +}
>>>> +
>>>> +/* Guest default policies inherit the host max leaf/subleaf settings. */
>>>> +static void __init guest_common_default_leaves(struct cpu_policy *p)
>>>> +{
>>>> +    p->basic.max_leaf       = host_cpu_policy.basic.max_leaf;
>>>> +    p->feat.max_subleaf     = host_cpu_policy.feat.max_subleaf;
>>>> +    p->extd.max_leaf        = host_cpu_policy.extd.max_leaf;
>>>> +}
>>> I think this what I'm going to ask is future work.  After the
>>> modifications done to the host policy by max functions
>>> (calculate_{hvm,pv}_max_policy()) won't the max {sub,}leaf adjustments
>>> better be done taking into account the contents of the policy, rather
>>> than capping to the host values?
>>>
>>> (note this comment is strictly for guest_common_default_leaves(), the
>>> max version is fine using ARRAY_SIZE).
>> I'm afraid I don't follow.
>>
>> calculate_{pv,hvm}_max_policy() don't modify the host policy.
> Hm, I don't think I've expressed myself clearly, sorry.  Let me try
> again.
>
> calculate_{hvm,pv}_max_policy() extends the host policy by possibly
> setting new features, and such extended policy is then used as the
> base for the PV/HVM default policies.
>
> Won't the resulting policy in calculate_{hvm,pv}_def_policy() risks
> having bits set past the max {sub,}leaf in the host policy, as it's
> based in {hvm,pv}_def_cpu_policy that might have such bits set?

Oh, right.

This patch doesn't change anything WRT that.

But I think you're right that we do risk getting into that case (in
principle at least) because of how guest_common_*_feature_adjustment() work.

Furthermore, the bug will typically get hidden because we serialise
based on the max_leaf/subleaf, and will discard feature words outside of
the max_leaf/subleaf bounds.

I suppose we probably want a variation of x86_cpu_featureset_to_policy()
which extends the max_leaf/subleaf based on non-zero values in leaves. 
(This already feels like it's going to be an ugly algorithm.)

~Andrew
Roger Pau Monné Oct. 30, 2024, 3:13 p.m. UTC | #8
On Wed, Oct 30, 2024 at 02:45:19PM +0000, Andrew Cooper wrote:
> On 30/10/2024 11:03 am, Roger Pau Monné wrote:
> > On Wed, Oct 30, 2024 at 10:39:12AM +0000, Andrew Cooper wrote:
> >> On 30/10/2024 8:59 am, Roger Pau Monné wrote:
> >>> On Tue, Oct 29, 2024 at 05:55:05PM +0000, Andrew Cooper wrote:
> >>>> diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c
> >>>> index b6d9fad56773..78bc9872b09a 100644
> >>>> --- a/xen/arch/x86/cpu-policy.c
> >>>> +++ b/xen/arch/x86/cpu-policy.c
> >>>> @@ -391,6 +391,27 @@ static void __init calculate_host_policy(void)
> >>>>      p->platform_info.cpuid_faulting = cpu_has_cpuid_faulting;
> >>>>  }
> >>>>  
> >>>> +/*
> >>>> + * Guest max policies can have any max leaf/subleaf within bounds.
> >>>> + *
> >>>> + * - Some incoming VMs have a larger-than-necessary feat max_subleaf.
> >>>> + * - Some VMs we'd like to synthesise leaves not present on the host.
> >>>> + */
> >>>> +static void __init guest_common_max_leaves(struct cpu_policy *p)
> >>>> +{
> >>>> +    p->basic.max_leaf       = ARRAY_SIZE(p->basic.raw) - 1;
> >>>> +    p->feat.max_subleaf     = ARRAY_SIZE(p->feat.raw) - 1;
> >>>> +    p->extd.max_leaf        = 0x80000000U + ARRAY_SIZE(p->extd.raw) - 1;
> >>>> +}
> >>>> +
> >>>> +/* Guest default policies inherit the host max leaf/subleaf settings. */
> >>>> +static void __init guest_common_default_leaves(struct cpu_policy *p)
> >>>> +{
> >>>> +    p->basic.max_leaf       = host_cpu_policy.basic.max_leaf;
> >>>> +    p->feat.max_subleaf     = host_cpu_policy.feat.max_subleaf;
> >>>> +    p->extd.max_leaf        = host_cpu_policy.extd.max_leaf;
> >>>> +}
> >>> I think this what I'm going to ask is future work.  After the
> >>> modifications done to the host policy by max functions
> >>> (calculate_{hvm,pv}_max_policy()) won't the max {sub,}leaf adjustments
> >>> better be done taking into account the contents of the policy, rather
> >>> than capping to the host values?
> >>>
> >>> (note this comment is strictly for guest_common_default_leaves(), the
> >>> max version is fine using ARRAY_SIZE).
> >> I'm afraid I don't follow.
> >>
> >> calculate_{pv,hvm}_max_policy() don't modify the host policy.
> > Hm, I don't think I've expressed myself clearly, sorry.  Let me try
> > again.
> >
> > calculate_{hvm,pv}_max_policy() extends the host policy by possibly
> > setting new features, and such extended policy is then used as the
> > base for the PV/HVM default policies.
> >
> > Won't the resulting policy in calculate_{hvm,pv}_def_policy() risks
> > having bits set past the max {sub,}leaf in the host policy, as it's
> > based in {hvm,pv}_def_cpu_policy that might have such bits set?
> 
> Oh, right.
> 
> This patch doesn't change anything WRT that.

Indeed, didn't intend my comment to block it, just that I think at
some point the logic in guest_common_default_leaves() will need to be
expanded.

> But I think you're right that we do risk getting into that case (in
> principle at least) because of how guest_common_*_feature_adjustment() work.
> 
> Furthermore, the bug will typically get hidden because we serialise
> based on the max_leaf/subleaf, and will discard feature words outside of
> the max_leaf/subleaf bounds.

Yes, once we serialize it for toolstack consumption the leafs will be
implicitly zeroed.

> I suppose we probably want a variation of x86_cpu_featureset_to_policy()
> which extends the max_leaf/subleaf based on non-zero values in leaves. 
> (This already feels like it's going to be an ugly algorithm.)

Hm, I was thinking that we would need to adjust
guest_common_default_leaves() to properly shrink the max {sub,}leaf
fields from the max policies.

Thanks, Roger.
Alejandro Vallejo Oct. 30, 2024, 3:59 p.m. UTC | #9
On Wed Oct 30, 2024 at 3:13 PM GMT, Roger Pau Monné wrote:
> On Wed, Oct 30, 2024 at 02:45:19PM +0000, Andrew Cooper wrote:
> > On 30/10/2024 11:03 am, Roger Pau Monné wrote:
> > > On Wed, Oct 30, 2024 at 10:39:12AM +0000, Andrew Cooper wrote:
> > >> On 30/10/2024 8:59 am, Roger Pau Monné wrote:
> > >>> On Tue, Oct 29, 2024 at 05:55:05PM +0000, Andrew Cooper wrote:
> > >>>> diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c
> > >>>> index b6d9fad56773..78bc9872b09a 100644
> > >>>> --- a/xen/arch/x86/cpu-policy.c
> > >>>> +++ b/xen/arch/x86/cpu-policy.c
> > >>>> @@ -391,6 +391,27 @@ static void __init calculate_host_policy(void)
> > >>>>      p->platform_info.cpuid_faulting = cpu_has_cpuid_faulting;
> > >>>>  }
> > >>>>  
> > >>>> +/*
> > >>>> + * Guest max policies can have any max leaf/subleaf within bounds.
> > >>>> + *
> > >>>> + * - Some incoming VMs have a larger-than-necessary feat max_subleaf.
> > >>>> + * - Some VMs we'd like to synthesise leaves not present on the host.
> > >>>> + */
> > >>>> +static void __init guest_common_max_leaves(struct cpu_policy *p)
> > >>>> +{
> > >>>> +    p->basic.max_leaf       = ARRAY_SIZE(p->basic.raw) - 1;
> > >>>> +    p->feat.max_subleaf     = ARRAY_SIZE(p->feat.raw) - 1;
> > >>>> +    p->extd.max_leaf        = 0x80000000U + ARRAY_SIZE(p->extd.raw) - 1;
> > >>>> +}
> > >>>> +
> > >>>> +/* Guest default policies inherit the host max leaf/subleaf settings. */
> > >>>> +static void __init guest_common_default_leaves(struct cpu_policy *p)
> > >>>> +{
> > >>>> +    p->basic.max_leaf       = host_cpu_policy.basic.max_leaf;
> > >>>> +    p->feat.max_subleaf     = host_cpu_policy.feat.max_subleaf;
> > >>>> +    p->extd.max_leaf        = host_cpu_policy.extd.max_leaf;
> > >>>> +}
> > >>> I think this what I'm going to ask is future work.  After the
> > >>> modifications done to the host policy by max functions
> > >>> (calculate_{hvm,pv}_max_policy()) won't the max {sub,}leaf adjustments
> > >>> better be done taking into account the contents of the policy, rather
> > >>> than capping to the host values?
> > >>>
> > >>> (note this comment is strictly for guest_common_default_leaves(), the
> > >>> max version is fine using ARRAY_SIZE).
> > >> I'm afraid I don't follow.
> > >>
> > >> calculate_{pv,hvm}_max_policy() don't modify the host policy.
> > > Hm, I don't think I've expressed myself clearly, sorry.  Let me try
> > > again.
> > >
> > > calculate_{hvm,pv}_max_policy() extends the host policy by possibly
> > > setting new features, and such extended policy is then used as the
> > > base for the PV/HVM default policies.
> > >
> > > Won't the resulting policy in calculate_{hvm,pv}_def_policy() risks
> > > having bits set past the max {sub,}leaf in the host policy, as it's
> > > based in {hvm,pv}_def_cpu_policy that might have such bits set?
> > 
> > Oh, right.
> > 
> > This patch doesn't change anything WRT that.
>
> Indeed, didn't intend my comment to block it, just that I think at
> some point the logic in guest_common_default_leaves() will need to be
> expanded.
>
> > But I think you're right that we do risk getting into that case (in
> > principle at least) because of how guest_common_*_feature_adjustment() work.
> > 
> > Furthermore, the bug will typically get hidden because we serialise
> > based on the max_leaf/subleaf, and will discard feature words outside of
> > the max_leaf/subleaf bounds.
>
> Yes, once we serialize it for toolstack consumption the leafs will be
> implicitly zeroed.
>
> > I suppose we probably want a variation of x86_cpu_featureset_to_policy()
> > which extends the max_leaf/subleaf based on non-zero values in leaves. 
> > (This already feels like it's going to be an ugly algorithm.)
>
> Hm, I was thinking that we would need to adjust
> guest_common_default_leaves() to properly shrink the max {sub,}leaf
> fields from the max policies.

That would be tricky in case we end up with subleafs that are strictly
populated at runtime. Xen would have no way of knowing whether that's meant to
be implemented or not. It seems safer to raise the max if we find a non-zero
leaves higher than the current max.

The algorithm is probably quite simple for static data, as it's merely
traversing the raw arrays and keeping track of the last non-zero leaf.

>
> Thanks, Roger.

Cheers,
Alejandro
Roger Pau Monné Oct. 30, 2024, 4:30 p.m. UTC | #10
On Wed, Oct 30, 2024 at 03:59:12PM +0000, Alejandro Vallejo wrote:
> On Wed Oct 30, 2024 at 3:13 PM GMT, Roger Pau Monné wrote:
> > On Wed, Oct 30, 2024 at 02:45:19PM +0000, Andrew Cooper wrote:
> > > On 30/10/2024 11:03 am, Roger Pau Monné wrote:
> > > > On Wed, Oct 30, 2024 at 10:39:12AM +0000, Andrew Cooper wrote:
> > > >> On 30/10/2024 8:59 am, Roger Pau Monné wrote:
> > > >>> On Tue, Oct 29, 2024 at 05:55:05PM +0000, Andrew Cooper wrote:
> > > >>>> diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c
> > > >>>> index b6d9fad56773..78bc9872b09a 100644
> > > >>>> --- a/xen/arch/x86/cpu-policy.c
> > > >>>> +++ b/xen/arch/x86/cpu-policy.c
> > > >>>> @@ -391,6 +391,27 @@ static void __init calculate_host_policy(void)
> > > >>>>      p->platform_info.cpuid_faulting = cpu_has_cpuid_faulting;
> > > >>>>  }
> > > >>>>  
> > > >>>> +/*
> > > >>>> + * Guest max policies can have any max leaf/subleaf within bounds.
> > > >>>> + *
> > > >>>> + * - Some incoming VMs have a larger-than-necessary feat max_subleaf.
> > > >>>> + * - Some VMs we'd like to synthesise leaves not present on the host.
> > > >>>> + */
> > > >>>> +static void __init guest_common_max_leaves(struct cpu_policy *p)
> > > >>>> +{
> > > >>>> +    p->basic.max_leaf       = ARRAY_SIZE(p->basic.raw) - 1;
> > > >>>> +    p->feat.max_subleaf     = ARRAY_SIZE(p->feat.raw) - 1;
> > > >>>> +    p->extd.max_leaf        = 0x80000000U + ARRAY_SIZE(p->extd.raw) - 1;
> > > >>>> +}
> > > >>>> +
> > > >>>> +/* Guest default policies inherit the host max leaf/subleaf settings. */
> > > >>>> +static void __init guest_common_default_leaves(struct cpu_policy *p)
> > > >>>> +{
> > > >>>> +    p->basic.max_leaf       = host_cpu_policy.basic.max_leaf;
> > > >>>> +    p->feat.max_subleaf     = host_cpu_policy.feat.max_subleaf;
> > > >>>> +    p->extd.max_leaf        = host_cpu_policy.extd.max_leaf;
> > > >>>> +}
> > > >>> I think this what I'm going to ask is future work.  After the
> > > >>> modifications done to the host policy by max functions
> > > >>> (calculate_{hvm,pv}_max_policy()) won't the max {sub,}leaf adjustments
> > > >>> better be done taking into account the contents of the policy, rather
> > > >>> than capping to the host values?
> > > >>>
> > > >>> (note this comment is strictly for guest_common_default_leaves(), the
> > > >>> max version is fine using ARRAY_SIZE).
> > > >> I'm afraid I don't follow.
> > > >>
> > > >> calculate_{pv,hvm}_max_policy() don't modify the host policy.
> > > > Hm, I don't think I've expressed myself clearly, sorry.  Let me try
> > > > again.
> > > >
> > > > calculate_{hvm,pv}_max_policy() extends the host policy by possibly
> > > > setting new features, and such extended policy is then used as the
> > > > base for the PV/HVM default policies.
> > > >
> > > > Won't the resulting policy in calculate_{hvm,pv}_def_policy() risks
> > > > having bits set past the max {sub,}leaf in the host policy, as it's
> > > > based in {hvm,pv}_def_cpu_policy that might have such bits set?
> > > 
> > > Oh, right.
> > > 
> > > This patch doesn't change anything WRT that.
> >
> > Indeed, didn't intend my comment to block it, just that I think at
> > some point the logic in guest_common_default_leaves() will need to be
> > expanded.
> >
> > > But I think you're right that we do risk getting into that case (in
> > > principle at least) because of how guest_common_*_feature_adjustment() work.
> > > 
> > > Furthermore, the bug will typically get hidden because we serialise
> > > based on the max_leaf/subleaf, and will discard feature words outside of
> > > the max_leaf/subleaf bounds.
> >
> > Yes, once we serialize it for toolstack consumption the leafs will be
> > implicitly zeroed.
> >
> > > I suppose we probably want a variation of x86_cpu_featureset_to_policy()
> > > which extends the max_leaf/subleaf based on non-zero values in leaves. 
> > > (This already feels like it's going to be an ugly algorithm.)
> >
> > Hm, I was thinking that we would need to adjust
> > guest_common_default_leaves() to properly shrink the max {sub,}leaf
> > fields from the max policies.
> 
> That would be tricky in case we end up with subleafs that are strictly
> populated at runtime.

Maybe we need a way to expose {sub,}leaf minimum value requirements in
the gen-cpuid.py logic, so we can tie minimum required {sub,}leaf
values to features?

I would like to think that those run-time populated leafs will be tied
to features, as to have a way to account for them.

> Xen would have no way of knowing whether that's meant to
> be implemented or not. It seems safer to raise the max if we find a non-zero
> leaves higher than the current max.

Raising might be better, TBH I didn't give the exact solution much
though.  But we need to be aware that setting it to the host value is
likely something we should look into fixing, otherwise subtle bugs
might occur.

Maybe add a comment to guest_common_default_leaves() in that regard?

Thanks, Roger.
Andrew Cooper Oct. 30, 2024, 4:51 p.m. UTC | #11
On 30/10/2024 3:13 pm, Roger Pau Monné wrote:
> On Wed, Oct 30, 2024 at 02:45:19PM +0000, Andrew Cooper wrote:
>> On 30/10/2024 11:03 am, Roger Pau Monné wrote:
>>> On Wed, Oct 30, 2024 at 10:39:12AM +0000, Andrew Cooper wrote:
>>>> On 30/10/2024 8:59 am, Roger Pau Monné wrote:
>>>>> On Tue, Oct 29, 2024 at 05:55:05PM +0000, Andrew Cooper wrote:
>>>>>> diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c
>>>>>> index b6d9fad56773..78bc9872b09a 100644
>>>>>> --- a/xen/arch/x86/cpu-policy.c
>>>>>> +++ b/xen/arch/x86/cpu-policy.c
>>>>>> @@ -391,6 +391,27 @@ static void __init calculate_host_policy(void)
>>>>>>      p->platform_info.cpuid_faulting = cpu_has_cpuid_faulting;
>>>>>>  }
>>>>>>  
>>>>>> +/*
>>>>>> + * Guest max policies can have any max leaf/subleaf within bounds.
>>>>>> + *
>>>>>> + * - Some incoming VMs have a larger-than-necessary feat max_subleaf.
>>>>>> + * - Some VMs we'd like to synthesise leaves not present on the host.
>>>>>> + */
>>>>>> +static void __init guest_common_max_leaves(struct cpu_policy *p)
>>>>>> +{
>>>>>> +    p->basic.max_leaf       = ARRAY_SIZE(p->basic.raw) - 1;
>>>>>> +    p->feat.max_subleaf     = ARRAY_SIZE(p->feat.raw) - 1;
>>>>>> +    p->extd.max_leaf        = 0x80000000U + ARRAY_SIZE(p->extd.raw) - 1;
>>>>>> +}
>>>>>> +
>>>>>> +/* Guest default policies inherit the host max leaf/subleaf settings. */
>>>>>> +static void __init guest_common_default_leaves(struct cpu_policy *p)
>>>>>> +{
>>>>>> +    p->basic.max_leaf       = host_cpu_policy.basic.max_leaf;
>>>>>> +    p->feat.max_subleaf     = host_cpu_policy.feat.max_subleaf;
>>>>>> +    p->extd.max_leaf        = host_cpu_policy.extd.max_leaf;
>>>>>> +}
>>>>> I think this what I'm going to ask is future work.  After the
>>>>> modifications done to the host policy by max functions
>>>>> (calculate_{hvm,pv}_max_policy()) won't the max {sub,}leaf adjustments
>>>>> better be done taking into account the contents of the policy, rather
>>>>> than capping to the host values?
>>>>>
>>>>> (note this comment is strictly for guest_common_default_leaves(), the
>>>>> max version is fine using ARRAY_SIZE).
>>>> I'm afraid I don't follow.
>>>>
>>>> calculate_{pv,hvm}_max_policy() don't modify the host policy.
>>> Hm, I don't think I've expressed myself clearly, sorry.  Let me try
>>> again.
>>>
>>> calculate_{hvm,pv}_max_policy() extends the host policy by possibly
>>> setting new features, and such extended policy is then used as the
>>> base for the PV/HVM default policies.
>>>
>>> Won't the resulting policy in calculate_{hvm,pv}_def_policy() risks
>>> having bits set past the max {sub,}leaf in the host policy, as it's
>>> based in {hvm,pv}_def_cpu_policy that might have such bits set?
>> Oh, right.
>>
>> This patch doesn't change anything WRT that.
> Indeed, didn't intend my comment to block it, just that I think at
> some point the logic in guest_common_default_leaves() will need to be
> expanded.
>
>> But I think you're right that we do risk getting into that case (in
>> principle at least) because of how guest_common_*_feature_adjustment() work.
>>
>> Furthermore, the bug will typically get hidden because we serialise
>> based on the max_leaf/subleaf, and will discard feature words outside of
>> the max_leaf/subleaf bounds.
> Yes, once we serialize it for toolstack consumption the leafs will be
> implicitly zeroed.
>
>> I suppose we probably want a variation of x86_cpu_featureset_to_policy()
>> which extends the max_leaf/subleaf based on non-zero values in leaves. 
>> (This already feels like it's going to be an ugly algorithm.)
> Hm, I was thinking that we would need to adjust
> guest_common_default_leaves() to properly shrink the max {sub,}leaf
> fields from the max policies.

Hmm.  What we'd do is have default inherit max's ARRAY_SIZES(), then do
all the existing logic, then as the final step, shrink the default
policies, vaguely per Jan's plan.

i.e. we'd end up deleting guest_common_default_leaves()

That way we don't need to encode any knowledge of which feature bit
means what WRT max_leaf/subleaf.

~Andrew
Roger Pau Monné Oct. 30, 2024, 5:10 p.m. UTC | #12
On Wed, Oct 30, 2024 at 04:51:34PM +0000, Andrew Cooper wrote:
> On 30/10/2024 3:13 pm, Roger Pau Monné wrote:
> > On Wed, Oct 30, 2024 at 02:45:19PM +0000, Andrew Cooper wrote:
> >> On 30/10/2024 11:03 am, Roger Pau Monné wrote:
> >>> On Wed, Oct 30, 2024 at 10:39:12AM +0000, Andrew Cooper wrote:
> >>>> On 30/10/2024 8:59 am, Roger Pau Monné wrote:
> >>>>> On Tue, Oct 29, 2024 at 05:55:05PM +0000, Andrew Cooper wrote:
> >>>>>> diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c
> >>>>>> index b6d9fad56773..78bc9872b09a 100644
> >>>>>> --- a/xen/arch/x86/cpu-policy.c
> >>>>>> +++ b/xen/arch/x86/cpu-policy.c
> >>>>>> @@ -391,6 +391,27 @@ static void __init calculate_host_policy(void)
> >>>>>>      p->platform_info.cpuid_faulting = cpu_has_cpuid_faulting;
> >>>>>>  }
> >>>>>>  
> >>>>>> +/*
> >>>>>> + * Guest max policies can have any max leaf/subleaf within bounds.
> >>>>>> + *
> >>>>>> + * - Some incoming VMs have a larger-than-necessary feat max_subleaf.
> >>>>>> + * - Some VMs we'd like to synthesise leaves not present on the host.
> >>>>>> + */
> >>>>>> +static void __init guest_common_max_leaves(struct cpu_policy *p)
> >>>>>> +{
> >>>>>> +    p->basic.max_leaf       = ARRAY_SIZE(p->basic.raw) - 1;
> >>>>>> +    p->feat.max_subleaf     = ARRAY_SIZE(p->feat.raw) - 1;
> >>>>>> +    p->extd.max_leaf        = 0x80000000U + ARRAY_SIZE(p->extd.raw) - 1;
> >>>>>> +}
> >>>>>> +
> >>>>>> +/* Guest default policies inherit the host max leaf/subleaf settings. */
> >>>>>> +static void __init guest_common_default_leaves(struct cpu_policy *p)
> >>>>>> +{
> >>>>>> +    p->basic.max_leaf       = host_cpu_policy.basic.max_leaf;
> >>>>>> +    p->feat.max_subleaf     = host_cpu_policy.feat.max_subleaf;
> >>>>>> +    p->extd.max_leaf        = host_cpu_policy.extd.max_leaf;
> >>>>>> +}
> >>>>> I think this what I'm going to ask is future work.  After the
> >>>>> modifications done to the host policy by max functions
> >>>>> (calculate_{hvm,pv}_max_policy()) won't the max {sub,}leaf adjustments
> >>>>> better be done taking into account the contents of the policy, rather
> >>>>> than capping to the host values?
> >>>>>
> >>>>> (note this comment is strictly for guest_common_default_leaves(), the
> >>>>> max version is fine using ARRAY_SIZE).
> >>>> I'm afraid I don't follow.
> >>>>
> >>>> calculate_{pv,hvm}_max_policy() don't modify the host policy.
> >>> Hm, I don't think I've expressed myself clearly, sorry.  Let me try
> >>> again.
> >>>
> >>> calculate_{hvm,pv}_max_policy() extends the host policy by possibly
> >>> setting new features, and such extended policy is then used as the
> >>> base for the PV/HVM default policies.
> >>>
> >>> Won't the resulting policy in calculate_{hvm,pv}_def_policy() risks
> >>> having bits set past the max {sub,}leaf in the host policy, as it's
> >>> based in {hvm,pv}_def_cpu_policy that might have such bits set?
> >> Oh, right.
> >>
> >> This patch doesn't change anything WRT that.
> > Indeed, didn't intend my comment to block it, just that I think at
> > some point the logic in guest_common_default_leaves() will need to be
> > expanded.
> >
> >> But I think you're right that we do risk getting into that case (in
> >> principle at least) because of how guest_common_*_feature_adjustment() work.
> >>
> >> Furthermore, the bug will typically get hidden because we serialise
> >> based on the max_leaf/subleaf, and will discard feature words outside of
> >> the max_leaf/subleaf bounds.
> > Yes, once we serialize it for toolstack consumption the leafs will be
> > implicitly zeroed.
> >
> >> I suppose we probably want a variation of x86_cpu_featureset_to_policy()
> >> which extends the max_leaf/subleaf based on non-zero values in leaves. 
> >> (This already feels like it's going to be an ugly algorithm.)
> > Hm, I was thinking that we would need to adjust
> > guest_common_default_leaves() to properly shrink the max {sub,}leaf
> > fields from the max policies.
> 
> Hmm.  What we'd do is have default inherit max's ARRAY_SIZES(), then do
> all the existing logic, then as the final step, shrink the default
> policies, vaguely per Jan's plan.
> 
> i.e. we'd end up deleting guest_common_default_leaves()
> 
> That way we don't need to encode any knowledge of which feature bit
> means what WRT max_leaf/subleaf.

What about Alejandro's concern about runtime populated {sub,}leafs,
won't we risk shrinking too much if the last leaf intended to be kept
happens to be a fully runtime populated one?

Do we need some kind of special magic for fully run-time populated
leafs (like the topology ones IIRC?) in order to account for them when
doing those max calculations?

Thanks, Roger.
Andrew Cooper Oct. 30, 2024, 5:24 p.m. UTC | #13
On 30/10/2024 5:10 pm, Roger Pau Monné wrote:
> On Wed, Oct 30, 2024 at 04:51:34PM +0000, Andrew Cooper wrote:
>> On 30/10/2024 3:13 pm, Roger Pau Monné wrote:
>>> On Wed, Oct 30, 2024 at 02:45:19PM +0000, Andrew Cooper wrote:
>>>> On 30/10/2024 11:03 am, Roger Pau Monné wrote:
>>>>> On Wed, Oct 30, 2024 at 10:39:12AM +0000, Andrew Cooper wrote:
>>>>>> On 30/10/2024 8:59 am, Roger Pau Monné wrote:
>>>>>>> On Tue, Oct 29, 2024 at 05:55:05PM +0000, Andrew Cooper wrote:
>>>>>>>> diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c
>>>>>>>> index b6d9fad56773..78bc9872b09a 100644
>>>>>>>> --- a/xen/arch/x86/cpu-policy.c
>>>>>>>> +++ b/xen/arch/x86/cpu-policy.c
>>>>>>>> @@ -391,6 +391,27 @@ static void __init calculate_host_policy(void)
>>>>>>>>      p->platform_info.cpuid_faulting = cpu_has_cpuid_faulting;
>>>>>>>>  }
>>>>>>>>  
>>>>>>>> +/*
>>>>>>>> + * Guest max policies can have any max leaf/subleaf within bounds.
>>>>>>>> + *
>>>>>>>> + * - Some incoming VMs have a larger-than-necessary feat max_subleaf.
>>>>>>>> + * - Some VMs we'd like to synthesise leaves not present on the host.
>>>>>>>> + */
>>>>>>>> +static void __init guest_common_max_leaves(struct cpu_policy *p)
>>>>>>>> +{
>>>>>>>> +    p->basic.max_leaf       = ARRAY_SIZE(p->basic.raw) - 1;
>>>>>>>> +    p->feat.max_subleaf     = ARRAY_SIZE(p->feat.raw) - 1;
>>>>>>>> +    p->extd.max_leaf        = 0x80000000U + ARRAY_SIZE(p->extd.raw) - 1;
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +/* Guest default policies inherit the host max leaf/subleaf settings. */
>>>>>>>> +static void __init guest_common_default_leaves(struct cpu_policy *p)
>>>>>>>> +{
>>>>>>>> +    p->basic.max_leaf       = host_cpu_policy.basic.max_leaf;
>>>>>>>> +    p->feat.max_subleaf     = host_cpu_policy.feat.max_subleaf;
>>>>>>>> +    p->extd.max_leaf        = host_cpu_policy.extd.max_leaf;
>>>>>>>> +}
>>>>>>> I think this what I'm going to ask is future work.  After the
>>>>>>> modifications done to the host policy by max functions
>>>>>>> (calculate_{hvm,pv}_max_policy()) won't the max {sub,}leaf adjustments
>>>>>>> better be done taking into account the contents of the policy, rather
>>>>>>> than capping to the host values?
>>>>>>>
>>>>>>> (note this comment is strictly for guest_common_default_leaves(), the
>>>>>>> max version is fine using ARRAY_SIZE).
>>>>>> I'm afraid I don't follow.
>>>>>>
>>>>>> calculate_{pv,hvm}_max_policy() don't modify the host policy.
>>>>> Hm, I don't think I've expressed myself clearly, sorry.  Let me try
>>>>> again.
>>>>>
>>>>> calculate_{hvm,pv}_max_policy() extends the host policy by possibly
>>>>> setting new features, and such extended policy is then used as the
>>>>> base for the PV/HVM default policies.
>>>>>
>>>>> Won't the resulting policy in calculate_{hvm,pv}_def_policy() risks
>>>>> having bits set past the max {sub,}leaf in the host policy, as it's
>>>>> based in {hvm,pv}_def_cpu_policy that might have such bits set?
>>>> Oh, right.
>>>>
>>>> This patch doesn't change anything WRT that.
>>> Indeed, didn't intend my comment to block it, just that I think at
>>> some point the logic in guest_common_default_leaves() will need to be
>>> expanded.
>>>
>>>> But I think you're right that we do risk getting into that case (in
>>>> principle at least) because of how guest_common_*_feature_adjustment() work.
>>>>
>>>> Furthermore, the bug will typically get hidden because we serialise
>>>> based on the max_leaf/subleaf, and will discard feature words outside of
>>>> the max_leaf/subleaf bounds.
>>> Yes, once we serialize it for toolstack consumption the leafs will be
>>> implicitly zeroed.
>>>
>>>> I suppose we probably want a variation of x86_cpu_featureset_to_policy()
>>>> which extends the max_leaf/subleaf based on non-zero values in leaves. 
>>>> (This already feels like it's going to be an ugly algorithm.)
>>> Hm, I was thinking that we would need to adjust
>>> guest_common_default_leaves() to properly shrink the max {sub,}leaf
>>> fields from the max policies.
>> Hmm.  What we'd do is have default inherit max's ARRAY_SIZES(), then do
>> all the existing logic, then as the final step, shrink the default
>> policies, vaguely per Jan's plan.
>>
>> i.e. we'd end up deleting guest_common_default_leaves()
>>
>> That way we don't need to encode any knowledge of which feature bit
>> means what WRT max_leaf/subleaf.
> What about Alejandro's concern about runtime populated {sub,}leafs,
> won't we risk shrinking too much if the last leaf intended to be kept
> happens to be a fully runtime populated one?
>
> Do we need some kind of special magic for fully run-time populated
> leafs (like the topology ones IIRC?) in order to account for them when
> doing those max calculations?

No.

Xen shrinks the default policies only, as part of calculating them on
boot, in order to make them look more plausible.

The toolstack shrinks the guest policy as part of domain construction.


In both cases, shrinking is probably the final action in curating the
policy, after all other "turns this on, turn that off" has been taken
into account.

~Andrew
Jan Beulich Oct. 31, 2024, 9:16 a.m. UTC | #14
On 30.10.2024 17:51, Andrew Cooper wrote:
> On 30/10/2024 3:13 pm, Roger Pau Monné wrote:
>> On Wed, Oct 30, 2024 at 02:45:19PM +0000, Andrew Cooper wrote:
>>> On 30/10/2024 11:03 am, Roger Pau Monné wrote:
>>>> On Wed, Oct 30, 2024 at 10:39:12AM +0000, Andrew Cooper wrote:
>>>>> On 30/10/2024 8:59 am, Roger Pau Monné wrote:
>>>>>> On Tue, Oct 29, 2024 at 05:55:05PM +0000, Andrew Cooper wrote:
>>>>>>> diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c
>>>>>>> index b6d9fad56773..78bc9872b09a 100644
>>>>>>> --- a/xen/arch/x86/cpu-policy.c
>>>>>>> +++ b/xen/arch/x86/cpu-policy.c
>>>>>>> @@ -391,6 +391,27 @@ static void __init calculate_host_policy(void)
>>>>>>>      p->platform_info.cpuid_faulting = cpu_has_cpuid_faulting;
>>>>>>>  }
>>>>>>>  
>>>>>>> +/*
>>>>>>> + * Guest max policies can have any max leaf/subleaf within bounds.
>>>>>>> + *
>>>>>>> + * - Some incoming VMs have a larger-than-necessary feat max_subleaf.
>>>>>>> + * - Some VMs we'd like to synthesise leaves not present on the host.
>>>>>>> + */
>>>>>>> +static void __init guest_common_max_leaves(struct cpu_policy *p)
>>>>>>> +{
>>>>>>> +    p->basic.max_leaf       = ARRAY_SIZE(p->basic.raw) - 1;
>>>>>>> +    p->feat.max_subleaf     = ARRAY_SIZE(p->feat.raw) - 1;
>>>>>>> +    p->extd.max_leaf        = 0x80000000U + ARRAY_SIZE(p->extd.raw) - 1;
>>>>>>> +}
>>>>>>> +
>>>>>>> +/* Guest default policies inherit the host max leaf/subleaf settings. */
>>>>>>> +static void __init guest_common_default_leaves(struct cpu_policy *p)
>>>>>>> +{
>>>>>>> +    p->basic.max_leaf       = host_cpu_policy.basic.max_leaf;
>>>>>>> +    p->feat.max_subleaf     = host_cpu_policy.feat.max_subleaf;
>>>>>>> +    p->extd.max_leaf        = host_cpu_policy.extd.max_leaf;
>>>>>>> +}
>>>>>> I think this what I'm going to ask is future work.  After the
>>>>>> modifications done to the host policy by max functions
>>>>>> (calculate_{hvm,pv}_max_policy()) won't the max {sub,}leaf adjustments
>>>>>> better be done taking into account the contents of the policy, rather
>>>>>> than capping to the host values?
>>>>>>
>>>>>> (note this comment is strictly for guest_common_default_leaves(), the
>>>>>> max version is fine using ARRAY_SIZE).
>>>>> I'm afraid I don't follow.
>>>>>
>>>>> calculate_{pv,hvm}_max_policy() don't modify the host policy.
>>>> Hm, I don't think I've expressed myself clearly, sorry.  Let me try
>>>> again.
>>>>
>>>> calculate_{hvm,pv}_max_policy() extends the host policy by possibly
>>>> setting new features, and such extended policy is then used as the
>>>> base for the PV/HVM default policies.
>>>>
>>>> Won't the resulting policy in calculate_{hvm,pv}_def_policy() risks
>>>> having bits set past the max {sub,}leaf in the host policy, as it's
>>>> based in {hvm,pv}_def_cpu_policy that might have such bits set?
>>> Oh, right.
>>>
>>> This patch doesn't change anything WRT that.
>> Indeed, didn't intend my comment to block it, just that I think at
>> some point the logic in guest_common_default_leaves() will need to be
>> expanded.
>>
>>> But I think you're right that we do risk getting into that case (in
>>> principle at least) because of how guest_common_*_feature_adjustment() work.
>>>
>>> Furthermore, the bug will typically get hidden because we serialise
>>> based on the max_leaf/subleaf, and will discard feature words outside of
>>> the max_leaf/subleaf bounds.
>> Yes, once we serialize it for toolstack consumption the leafs will be
>> implicitly zeroed.
>>
>>> I suppose we probably want a variation of x86_cpu_featureset_to_policy()
>>> which extends the max_leaf/subleaf based on non-zero values in leaves. 
>>> (This already feels like it's going to be an ugly algorithm.)
>> Hm, I was thinking that we would need to adjust
>> guest_common_default_leaves() to properly shrink the max {sub,}leaf
>> fields from the max policies.
> 
> Hmm.  What we'd do is have default inherit max's ARRAY_SIZES(), then do
> all the existing logic, then as the final step, shrink the default
> policies, vaguely per Jan's plan.

Yet, beyond what my present patch has, not below anything the tool stack
has asked for explicitly.

Jan
Jan Beulich Oct. 31, 2024, 9:20 a.m. UTC | #15
On 30.10.2024 18:10, Roger Pau Monné wrote:
> On Wed, Oct 30, 2024 at 04:51:34PM +0000, Andrew Cooper wrote:
>> On 30/10/2024 3:13 pm, Roger Pau Monné wrote:
>>> On Wed, Oct 30, 2024 at 02:45:19PM +0000, Andrew Cooper wrote:
>>>> On 30/10/2024 11:03 am, Roger Pau Monné wrote:
>>>>> On Wed, Oct 30, 2024 at 10:39:12AM +0000, Andrew Cooper wrote:
>>>>>> On 30/10/2024 8:59 am, Roger Pau Monné wrote:
>>>>>>> On Tue, Oct 29, 2024 at 05:55:05PM +0000, Andrew Cooper wrote:
>>>>>>>> diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c
>>>>>>>> index b6d9fad56773..78bc9872b09a 100644
>>>>>>>> --- a/xen/arch/x86/cpu-policy.c
>>>>>>>> +++ b/xen/arch/x86/cpu-policy.c
>>>>>>>> @@ -391,6 +391,27 @@ static void __init calculate_host_policy(void)
>>>>>>>>      p->platform_info.cpuid_faulting = cpu_has_cpuid_faulting;
>>>>>>>>  }
>>>>>>>>  
>>>>>>>> +/*
>>>>>>>> + * Guest max policies can have any max leaf/subleaf within bounds.
>>>>>>>> + *
>>>>>>>> + * - Some incoming VMs have a larger-than-necessary feat max_subleaf.
>>>>>>>> + * - Some VMs we'd like to synthesise leaves not present on the host.
>>>>>>>> + */
>>>>>>>> +static void __init guest_common_max_leaves(struct cpu_policy *p)
>>>>>>>> +{
>>>>>>>> +    p->basic.max_leaf       = ARRAY_SIZE(p->basic.raw) - 1;
>>>>>>>> +    p->feat.max_subleaf     = ARRAY_SIZE(p->feat.raw) - 1;
>>>>>>>> +    p->extd.max_leaf        = 0x80000000U + ARRAY_SIZE(p->extd.raw) - 1;
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +/* Guest default policies inherit the host max leaf/subleaf settings. */
>>>>>>>> +static void __init guest_common_default_leaves(struct cpu_policy *p)
>>>>>>>> +{
>>>>>>>> +    p->basic.max_leaf       = host_cpu_policy.basic.max_leaf;
>>>>>>>> +    p->feat.max_subleaf     = host_cpu_policy.feat.max_subleaf;
>>>>>>>> +    p->extd.max_leaf        = host_cpu_policy.extd.max_leaf;
>>>>>>>> +}
>>>>>>> I think this what I'm going to ask is future work.  After the
>>>>>>> modifications done to the host policy by max functions
>>>>>>> (calculate_{hvm,pv}_max_policy()) won't the max {sub,}leaf adjustments
>>>>>>> better be done taking into account the contents of the policy, rather
>>>>>>> than capping to the host values?
>>>>>>>
>>>>>>> (note this comment is strictly for guest_common_default_leaves(), the
>>>>>>> max version is fine using ARRAY_SIZE).
>>>>>> I'm afraid I don't follow.
>>>>>>
>>>>>> calculate_{pv,hvm}_max_policy() don't modify the host policy.
>>>>> Hm, I don't think I've expressed myself clearly, sorry.  Let me try
>>>>> again.
>>>>>
>>>>> calculate_{hvm,pv}_max_policy() extends the host policy by possibly
>>>>> setting new features, and such extended policy is then used as the
>>>>> base for the PV/HVM default policies.
>>>>>
>>>>> Won't the resulting policy in calculate_{hvm,pv}_def_policy() risks
>>>>> having bits set past the max {sub,}leaf in the host policy, as it's
>>>>> based in {hvm,pv}_def_cpu_policy that might have such bits set?
>>>> Oh, right.
>>>>
>>>> This patch doesn't change anything WRT that.
>>> Indeed, didn't intend my comment to block it, just that I think at
>>> some point the logic in guest_common_default_leaves() will need to be
>>> expanded.
>>>
>>>> But I think you're right that we do risk getting into that case (in
>>>> principle at least) because of how guest_common_*_feature_adjustment() work.
>>>>
>>>> Furthermore, the bug will typically get hidden because we serialise
>>>> based on the max_leaf/subleaf, and will discard feature words outside of
>>>> the max_leaf/subleaf bounds.
>>> Yes, once we serialize it for toolstack consumption the leafs will be
>>> implicitly zeroed.
>>>
>>>> I suppose we probably want a variation of x86_cpu_featureset_to_policy()
>>>> which extends the max_leaf/subleaf based on non-zero values in leaves. 
>>>> (This already feels like it's going to be an ugly algorithm.)
>>> Hm, I was thinking that we would need to adjust
>>> guest_common_default_leaves() to properly shrink the max {sub,}leaf
>>> fields from the max policies.
>>
>> Hmm.  What we'd do is have default inherit max's ARRAY_SIZES(), then do
>> all the existing logic, then as the final step, shrink the default
>> policies, vaguely per Jan's plan.
>>
>> i.e. we'd end up deleting guest_common_default_leaves()
>>
>> That way we don't need to encode any knowledge of which feature bit
>> means what WRT max_leaf/subleaf.
> 
> What about Alejandro's concern about runtime populated {sub,}leafs,
> won't we risk shrinking too much if the last leaf intended to be kept
> happens to be a fully runtime populated one?
> 
> Do we need some kind of special magic for fully run-time populated
> leafs (like the topology ones IIRC?) in order to account for them when
> doing those max calculations?

Contrary to Andrew's reply I think we will need to take runtime-populated
leaves into account specially, as you suggest. Just thinking of something
APIC-ID-like in a very high leaf, which (presumably) ought to be zero in
max/default. While keeping such fields at zero in max/default for external
exposure, filling them with a non-zero value at policy creation (maybe
simply their max value) might help keep the shrinking logic agnostic to
such special cases.

Jan
diff mbox series

Patch

diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c
index b6d9fad56773..78bc9872b09a 100644
--- a/xen/arch/x86/cpu-policy.c
+++ b/xen/arch/x86/cpu-policy.c
@@ -391,6 +391,27 @@  static void __init calculate_host_policy(void)
     p->platform_info.cpuid_faulting = cpu_has_cpuid_faulting;
 }
 
+/*
+ * Guest max policies can have any max leaf/subleaf within bounds.
+ *
+ * - Some incoming VMs have a larger-than-necessary feat max_subleaf.
+ * - Some VMs we'd like to synthesise leaves not present on the host.
+ */
+static void __init guest_common_max_leaves(struct cpu_policy *p)
+{
+    p->basic.max_leaf       = ARRAY_SIZE(p->basic.raw) - 1;
+    p->feat.max_subleaf     = ARRAY_SIZE(p->feat.raw) - 1;
+    p->extd.max_leaf        = 0x80000000U + ARRAY_SIZE(p->extd.raw) - 1;
+}
+
+/* Guest default policies inherit the host max leaf/subleaf settings. */
+static void __init guest_common_default_leaves(struct cpu_policy *p)
+{
+    p->basic.max_leaf       = host_cpu_policy.basic.max_leaf;
+    p->feat.max_subleaf     = host_cpu_policy.feat.max_subleaf;
+    p->extd.max_leaf        = host_cpu_policy.extd.max_leaf;
+}
+
 static void __init guest_common_max_feature_adjustments(uint32_t *fs)
 {
     if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL )
@@ -579,11 +600,7 @@  static void __init calculate_pv_max_policy(void)
 
     *p = host_cpu_policy;
 
-    /*
-     * Some VMs may have a larger-than-necessary feat max_subleaf.  Allow them
-     * to migrate in.
-     */
-    p->feat.max_subleaf = ARRAY_SIZE(p->feat.raw) - 1;
+    guest_common_max_leaves(p);
 
     x86_cpu_policy_to_featureset(p, fs);
 
@@ -626,8 +643,7 @@  static void __init calculate_pv_def_policy(void)
 
     *p = pv_max_cpu_policy;
 
-    /* Default to the same max_subleaf as the host. */
-    p->feat.max_subleaf = host_cpu_policy.feat.max_subleaf;
+    guest_common_default_leaves(p);
 
     x86_cpu_policy_to_featureset(p, fs);
 
@@ -666,11 +682,7 @@  static void __init calculate_hvm_max_policy(void)
 
     *p = host_cpu_policy;
 
-    /*
-     * Some VMs may have a larger-than-necessary feat max_subleaf.  Allow them
-     * to migrate in.
-     */
-    p->feat.max_subleaf = ARRAY_SIZE(p->feat.raw) - 1;
+    guest_common_max_leaves(p);
 
     x86_cpu_policy_to_featureset(p, fs);
 
@@ -790,8 +802,7 @@  static void __init calculate_hvm_def_policy(void)
 
     *p = hvm_max_cpu_policy;
 
-    /* Default to the same max_subleaf as the host. */
-    p->feat.max_subleaf = host_cpu_policy.feat.max_subleaf;
+    guest_common_default_leaves(p);
 
     x86_cpu_policy_to_featureset(p, fs);