Message ID | f51b54328a09c510c9320f1317c2da371ef16eb5.1715102098.git.alejandro.vallejo@cloud.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | x86: Expose consistent topology to guests | expand |
On Wed, May 08, 2024 at 01:39:27PM +0100, Alejandro Vallejo wrote: > Expose sensible topologies in leaf 0xb. At the moment it synthesises non-HT > systems, in line with the previous code intent. > > Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com> > --- > v2: > * Zap the topology leaves of (pv/hvm)_(def/max)_policy rather than the host policy > --- > tools/libs/guest/xg_cpuid_x86.c | 62 +++++---------------------------- > xen/arch/x86/cpu-policy.c | 9 +++-- > 2 files changed, 15 insertions(+), 56 deletions(-) > > diff --git a/tools/libs/guest/xg_cpuid_x86.c b/tools/libs/guest/xg_cpuid_x86.c > index 4453178100ad..8170769dbe43 100644 > --- a/tools/libs/guest/xg_cpuid_x86.c > +++ b/tools/libs/guest/xg_cpuid_x86.c > @@ -584,7 +584,7 @@ int xc_cpuid_apply_policy(xc_interface *xch, uint32_t domid, bool restore, > bool hvm; > xc_domaininfo_t di; > struct xc_cpu_policy *p = xc_cpu_policy_init(); > - unsigned int i, nr_leaves = ARRAY_SIZE(p->leaves), nr_msrs = 0; > + unsigned int nr_leaves = ARRAY_SIZE(p->leaves), nr_msrs = 0; > uint32_t err_leaf = -1, err_subleaf = -1, err_msr = -1; > uint32_t host_featureset[FEATURESET_NR_ENTRIES] = {}; > uint32_t len = ARRAY_SIZE(host_featureset); > @@ -727,59 +727,15 @@ int xc_cpuid_apply_policy(xc_interface *xch, uint32_t domid, bool restore, > } > else > { > - /* > - * Topology for HVM guests is entirely controlled by Xen. For now, we > - * hardcode APIC_ID = vcpu_id * 2 to give the illusion of no SMT. > - */ > - p->policy.basic.htt = true; > - p->policy.extd.cmp_legacy = false; > - > - /* > - * Leaf 1 EBX[23:16] is Maximum Logical Processors Per Package. > - * Update to reflect vLAPIC_ID = vCPU_ID * 2, but make sure to avoid > - * overflow. > - */ > - if ( !p->policy.basic.lppp ) > - p->policy.basic.lppp = 2; > - else if ( !(p->policy.basic.lppp & 0x80) ) > - p->policy.basic.lppp *= 2; > - > - switch ( p->policy.x86_vendor ) > + /* TODO: Expose the ability to choose a custom topology for HVM/PVH */ > + unsigned int threads_per_core = 1; > + unsigned int cores_per_pkg = di.max_vcpu_id + 1; Newline. > + rc = x86_topo_from_parts(&p->policy, threads_per_core, cores_per_pkg); I assume this generates the same topology as the current code, or will the population of the leaves be different in some way? > + if ( rc ) > { > - case X86_VENDOR_INTEL: > - for ( i = 0; (p->policy.cache.subleaf[i].type && > - i < ARRAY_SIZE(p->policy.cache.raw)); ++i ) > - { > - p->policy.cache.subleaf[i].cores_per_package = > - (p->policy.cache.subleaf[i].cores_per_package << 1) | 1; > - p->policy.cache.subleaf[i].threads_per_cache = 0; > - } > - break; > - > - case X86_VENDOR_AMD: > - case X86_VENDOR_HYGON: > - /* > - * Leaf 0x80000008 ECX[15:12] is ApicIdCoreSize. > - * Leaf 0x80000008 ECX[7:0] is NumberOfCores (minus one). > - * Update to reflect vLAPIC_ID = vCPU_ID * 2. But avoid > - * - overflow, > - * - going out of sync with leaf 1 EBX[23:16], > - * - incrementing ApicIdCoreSize when it's zero (which changes the > - * meaning of bits 7:0). > - * > - * UPDATE: I addition to avoiding overflow, some > - * proprietary operating systems have trouble with > - * apic_id_size values greater than 7. Limit the value to > - * 7 for now. > - */ > - if ( p->policy.extd.nc < 0x7f ) > - { > - if ( p->policy.extd.apic_id_size != 0 && p->policy.extd.apic_id_size < 0x7 ) > - p->policy.extd.apic_id_size++; > - > - p->policy.extd.nc = (p->policy.extd.nc << 1) | 1; > - } > - break; > + ERROR("Failed to generate topology: t/c=%u c/p=%u", > + threads_per_core, cores_per_pkg); Could you also print the error code? > + goto out; > } > } > > diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c > index 4b6d96276399..0ad871732ba0 100644 > --- a/xen/arch/x86/cpu-policy.c > +++ b/xen/arch/x86/cpu-policy.c > @@ -278,9 +278,6 @@ static void recalculate_misc(struct cpu_policy *p) > > p->basic.raw[0x8] = EMPTY_LEAF; > > - /* TODO: Rework topology logic. */ > - memset(p->topo.raw, 0, sizeof(p->topo.raw)); > - > p->basic.raw[0xc] = EMPTY_LEAF; > > p->extd.e1d &= ~CPUID_COMMON_1D_FEATURES; > @@ -621,6 +618,9 @@ static void __init calculate_pv_max_policy(void) > recalculate_xstate(p); > > p->extd.raw[0xa] = EMPTY_LEAF; /* No SVM for PV guests. */ > + > + /* Wipe host topology. Toolstack is expected to synthesise a sensible one */ > + memset(p->topo.raw, 0, sizeof(p->topo.raw)); > } > > static void __init calculate_pv_def_policy(void) > @@ -773,6 +773,9 @@ static void __init calculate_hvm_max_policy(void) > > /* It's always possible to emulate CPUID faulting for HVM guests */ > p->platform_info.cpuid_faulting = true; > + > + /* Wipe host topology. Toolstack is expected to synthesise a sensible one */ Line length. /* Wipe host topology. Populated by toolstack. */ Would be OK IMO. Thanks, Roger. > + memset(p->topo.raw, 0, sizeof(p->topo.raw)); Note that currently the host policy also gets the topology leaves cleared, is it intended to not clear them anymore after this change? (as you only clear the leaves for the guest {max,def} policies) Thanks, Roger.
On 24/05/2024 09:58, Roger Pau Monné wrote: > On Wed, May 08, 2024 at 01:39:27PM +0100, Alejandro Vallejo wrote: >> Expose sensible topologies in leaf 0xb. At the moment it synthesises non-HT >> systems, in line with the previous code intent. >> >> Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com> >> --- >> v2: >> * Zap the topology leaves of (pv/hvm)_(def/max)_policy rather than the host policy >> --- >> tools/libs/guest/xg_cpuid_x86.c | 62 +++++---------------------------- >> xen/arch/x86/cpu-policy.c | 9 +++-- >> 2 files changed, 15 insertions(+), 56 deletions(-) >> >> diff --git a/tools/libs/guest/xg_cpuid_x86.c b/tools/libs/guest/xg_cpuid_x86.c >> index 4453178100ad..8170769dbe43 100644 >> --- a/tools/libs/guest/xg_cpuid_x86.c >> +++ b/tools/libs/guest/xg_cpuid_x86.c >> @@ -584,7 +584,7 @@ int xc_cpuid_apply_policy(xc_interface *xch, uint32_t domid, bool restore, >> bool hvm; >> xc_domaininfo_t di; >> struct xc_cpu_policy *p = xc_cpu_policy_init(); >> - unsigned int i, nr_leaves = ARRAY_SIZE(p->leaves), nr_msrs = 0; >> + unsigned int nr_leaves = ARRAY_SIZE(p->leaves), nr_msrs = 0; >> uint32_t err_leaf = -1, err_subleaf = -1, err_msr = -1; >> uint32_t host_featureset[FEATURESET_NR_ENTRIES] = {}; >> uint32_t len = ARRAY_SIZE(host_featureset); >> @@ -727,59 +727,15 @@ int xc_cpuid_apply_policy(xc_interface *xch, uint32_t domid, bool restore, >> } >> else >> { >> - /* >> - * Topology for HVM guests is entirely controlled by Xen. For now, we >> - * hardcode APIC_ID = vcpu_id * 2 to give the illusion of no SMT. >> - */ >> - p->policy.basic.htt = true; >> - p->policy.extd.cmp_legacy = false; >> - >> - /* >> - * Leaf 1 EBX[23:16] is Maximum Logical Processors Per Package. >> - * Update to reflect vLAPIC_ID = vCPU_ID * 2, but make sure to avoid >> - * overflow. >> - */ >> - if ( !p->policy.basic.lppp ) >> - p->policy.basic.lppp = 2; >> - else if ( !(p->policy.basic.lppp & 0x80) ) >> - p->policy.basic.lppp *= 2; >> - >> - switch ( p->policy.x86_vendor ) >> + /* TODO: Expose the ability to choose a custom topology for HVM/PVH */ >> + unsigned int threads_per_core = 1; >> + unsigned int cores_per_pkg = di.max_vcpu_id + 1; > > Newline. ack > >> + rc = x86_topo_from_parts(&p->policy, threads_per_core, cores_per_pkg); > > I assume this generates the same topology as the current code, or will > the population of the leaves be different in some way? > The current code does not populate 0xb. This generates a topology consistent with the existing INTENDED topology. The actual APIC IDs will be different though (because there's no skipping of odd values). All the dance in patch 1 was to make this migrate-safe. The x2apic ID is stored in the lapic hidden regs so differences with previous behaviour don't matter. IOW, The differences are: * 0xb is exposed, whereas previously it wasn't * APIC IDs are compacted such that new_apicid=old_apicid/2 * There's also a cleanup of the murkier paths to put the right core counts in the right leaves (whereas previously it was bonkers) >> + if ( rc ) >> { >> - case X86_VENDOR_INTEL: >> - for ( i = 0; (p->policy.cache.subleaf[i].type && >> - i < ARRAY_SIZE(p->policy.cache.raw)); ++i ) >> - { >> - p->policy.cache.subleaf[i].cores_per_package = >> - (p->policy.cache.subleaf[i].cores_per_package << 1) | 1; >> - p->policy.cache.subleaf[i].threads_per_cache = 0; >> - } >> - break; >> - >> - case X86_VENDOR_AMD: >> - case X86_VENDOR_HYGON: >> - /* >> - * Leaf 0x80000008 ECX[15:12] is ApicIdCoreSize. >> - * Leaf 0x80000008 ECX[7:0] is NumberOfCores (minus one). >> - * Update to reflect vLAPIC_ID = vCPU_ID * 2. But avoid >> - * - overflow, >> - * - going out of sync with leaf 1 EBX[23:16], >> - * - incrementing ApicIdCoreSize when it's zero (which changes the >> - * meaning of bits 7:0). >> - * >> - * UPDATE: I addition to avoiding overflow, some >> - * proprietary operating systems have trouble with >> - * apic_id_size values greater than 7. Limit the value to >> - * 7 for now. >> - */ >> - if ( p->policy.extd.nc < 0x7f ) >> - { >> - if ( p->policy.extd.apic_id_size != 0 && p->policy.extd.apic_id_size < 0x7 ) >> - p->policy.extd.apic_id_size++; >> - >> - p->policy.extd.nc = (p->policy.extd.nc << 1) | 1; >> - } >> - break; >> + ERROR("Failed to generate topology: t/c=%u c/p=%u", >> + threads_per_core, cores_per_pkg); > > Could you also print the error code? Sure > >> + goto out; >> } >> } >> >> diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c >> index 4b6d96276399..0ad871732ba0 100644 >> --- a/xen/arch/x86/cpu-policy.c >> +++ b/xen/arch/x86/cpu-policy.c >> @@ -278,9 +278,6 @@ static void recalculate_misc(struct cpu_policy *p) >> >> p->basic.raw[0x8] = EMPTY_LEAF; >> >> - /* TODO: Rework topology logic. */ >> - memset(p->topo.raw, 0, sizeof(p->topo.raw)); >> - >> p->basic.raw[0xc] = EMPTY_LEAF; >> >> p->extd.e1d &= ~CPUID_COMMON_1D_FEATURES; >> @@ -621,6 +618,9 @@ static void __init calculate_pv_max_policy(void) >> recalculate_xstate(p); >> >> p->extd.raw[0xa] = EMPTY_LEAF; /* No SVM for PV guests. */ >> + >> + /* Wipe host topology. Toolstack is expected to synthesise a sensible one */ >> + memset(p->topo.raw, 0, sizeof(p->topo.raw)); >> } >> >> static void __init calculate_pv_def_policy(void) >> @@ -773,6 +773,9 @@ static void __init calculate_hvm_max_policy(void) >> >> /* It's always possible to emulate CPUID faulting for HVM guests */ >> p->platform_info.cpuid_faulting = true; >> + >> + /* Wipe host topology. Toolstack is expected to synthesise a sensible one */ > > Line length. > > /* Wipe host topology. Populated by toolstack. */ > > Would be OK IMO. Ack > > Thanks, Roger. >> + memset(p->topo.raw, 0, sizeof(p->topo.raw)); > > Note that currently the host policy also gets the topology leaves > cleared, is it intended to not clear them anymore after this change? > > (as you only clear the leaves for the guest {max,def} policies) > > Thanks, Roger. It was like that originally in v1, I changed in v2 as part of feedback from Jan. Cheers, Alejandro
On Fri, May 24, 2024 at 06:16:01PM +0100, Alejandro Vallejo wrote: > On 24/05/2024 09:58, Roger Pau Monné wrote: > > On Wed, May 08, 2024 at 01:39:27PM +0100, Alejandro Vallejo wrote: > > > >> + rc = x86_topo_from_parts(&p->policy, threads_per_core, cores_per_pkg); > > > > I assume this generates the same topology as the current code, or will > > the population of the leaves be different in some way? > > > > The current code does not populate 0xb. This generates a topology > consistent with the existing INTENDED topology. The actual APIC IDs will > be different though (because there's no skipping of odd values). > > All the dance in patch 1 was to make this migrate-safe. The x2apic ID is > stored in the lapic hidden regs so differences with previous behaviour > don't matter. What about systems without CPU policy in the migration stream, will those also get restored as expected? I think you likely need to check whether 'restore' is set and keep the old logic in that case? As otherwise migrated systems without a CPU policy will get the new topology information instead of the old one? > IOW, The differences are: > * 0xb is exposed, whereas previously it wasn't > * APIC IDs are compacted such that new_apicid=old_apicid/2 > * There's also a cleanup of the murkier paths to put the right core > counts in the right leaves (whereas previously it was bonkers) This needs to be in the commit message IMO. > > > > Note that currently the host policy also gets the topology leaves > > cleared, is it intended to not clear them anymore after this change? > > > > (as you only clear the leaves for the guest {max,def} policies) > > > > Thanks, Roger. > > It was like that originally in v1, I changed in v2 as part of feedback > from Jan. I think that's fine, but this divergence from current behavior of cleaning the topology for the host policy needs to be mentioned in the commit message. Thanks, Roger.
On 27/05/2024 09:20, Roger Pau Monné wrote: > On Fri, May 24, 2024 at 06:16:01PM +0100, Alejandro Vallejo wrote: >> On 24/05/2024 09:58, Roger Pau Monné wrote: >>> On Wed, May 08, 2024 at 01:39:27PM +0100, Alejandro Vallejo wrote: >>> >>>> + rc = x86_topo_from_parts(&p->policy, threads_per_core, cores_per_pkg); >>> >>> I assume this generates the same topology as the current code, or will >>> the population of the leaves be different in some way? >>> >> >> The current code does not populate 0xb. This generates a topology >> consistent with the existing INTENDED topology. The actual APIC IDs will >> be different though (because there's no skipping of odd values). >> >> All the dance in patch 1 was to make this migrate-safe. The x2apic ID is >> stored in the lapic hidden regs so differences with previous behaviour >> don't matter. > > What about systems without CPU policy in the migration stream, will > those also get restored as expected? > > I think you likely need to check whether 'restore' is set and keep the > old logic in that case? > > As otherwise migrated systems without a CPU policy will get the new > topology information instead of the old one? Bah. I hoped the x2apic ID restoration would mean I could get away with removing all that junk, but migrations from Xen v.StoneAge do cause mayhem. And it'd be very bizarre because the new topology leaves would not reflect the existing x2apic IDs either. I'll condense that blob of nonsense with the old scheme into a separate function so we can easily deprecate it in the future. > >> IOW, The differences are: >> * 0xb is exposed, whereas previously it wasn't >> * APIC IDs are compacted such that new_apicid=old_apicid/2 >> * There's also a cleanup of the murkier paths to put the right core >> counts in the right leaves (whereas previously it was bonkers) > > This needs to be in the commit message IMO. > Sure. >>> >>> Note that currently the host policy also gets the topology leaves >>> cleared, is it intended to not clear them anymore after this change? >>> >>> (as you only clear the leaves for the guest {max,def} policies) >>> >>> Thanks, Roger. >> >> It was like that originally in v1, I changed in v2 as part of feedback >> from Jan. > > I think that's fine, but this divergence from current behavior of > cleaning the topology for the host policy needs to be mentioned in > the commit message. > > Thanks, Roger. Sure. Cheers, Alejandro
diff --git a/tools/libs/guest/xg_cpuid_x86.c b/tools/libs/guest/xg_cpuid_x86.c index 4453178100ad..8170769dbe43 100644 --- a/tools/libs/guest/xg_cpuid_x86.c +++ b/tools/libs/guest/xg_cpuid_x86.c @@ -584,7 +584,7 @@ int xc_cpuid_apply_policy(xc_interface *xch, uint32_t domid, bool restore, bool hvm; xc_domaininfo_t di; struct xc_cpu_policy *p = xc_cpu_policy_init(); - unsigned int i, nr_leaves = ARRAY_SIZE(p->leaves), nr_msrs = 0; + unsigned int nr_leaves = ARRAY_SIZE(p->leaves), nr_msrs = 0; uint32_t err_leaf = -1, err_subleaf = -1, err_msr = -1; uint32_t host_featureset[FEATURESET_NR_ENTRIES] = {}; uint32_t len = ARRAY_SIZE(host_featureset); @@ -727,59 +727,15 @@ int xc_cpuid_apply_policy(xc_interface *xch, uint32_t domid, bool restore, } else { - /* - * Topology for HVM guests is entirely controlled by Xen. For now, we - * hardcode APIC_ID = vcpu_id * 2 to give the illusion of no SMT. - */ - p->policy.basic.htt = true; - p->policy.extd.cmp_legacy = false; - - /* - * Leaf 1 EBX[23:16] is Maximum Logical Processors Per Package. - * Update to reflect vLAPIC_ID = vCPU_ID * 2, but make sure to avoid - * overflow. - */ - if ( !p->policy.basic.lppp ) - p->policy.basic.lppp = 2; - else if ( !(p->policy.basic.lppp & 0x80) ) - p->policy.basic.lppp *= 2; - - switch ( p->policy.x86_vendor ) + /* TODO: Expose the ability to choose a custom topology for HVM/PVH */ + unsigned int threads_per_core = 1; + unsigned int cores_per_pkg = di.max_vcpu_id + 1; + rc = x86_topo_from_parts(&p->policy, threads_per_core, cores_per_pkg); + if ( rc ) { - case X86_VENDOR_INTEL: - for ( i = 0; (p->policy.cache.subleaf[i].type && - i < ARRAY_SIZE(p->policy.cache.raw)); ++i ) - { - p->policy.cache.subleaf[i].cores_per_package = - (p->policy.cache.subleaf[i].cores_per_package << 1) | 1; - p->policy.cache.subleaf[i].threads_per_cache = 0; - } - break; - - case X86_VENDOR_AMD: - case X86_VENDOR_HYGON: - /* - * Leaf 0x80000008 ECX[15:12] is ApicIdCoreSize. - * Leaf 0x80000008 ECX[7:0] is NumberOfCores (minus one). - * Update to reflect vLAPIC_ID = vCPU_ID * 2. But avoid - * - overflow, - * - going out of sync with leaf 1 EBX[23:16], - * - incrementing ApicIdCoreSize when it's zero (which changes the - * meaning of bits 7:0). - * - * UPDATE: I addition to avoiding overflow, some - * proprietary operating systems have trouble with - * apic_id_size values greater than 7. Limit the value to - * 7 for now. - */ - if ( p->policy.extd.nc < 0x7f ) - { - if ( p->policy.extd.apic_id_size != 0 && p->policy.extd.apic_id_size < 0x7 ) - p->policy.extd.apic_id_size++; - - p->policy.extd.nc = (p->policy.extd.nc << 1) | 1; - } - break; + ERROR("Failed to generate topology: t/c=%u c/p=%u", + threads_per_core, cores_per_pkg); + goto out; } } diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c index 4b6d96276399..0ad871732ba0 100644 --- a/xen/arch/x86/cpu-policy.c +++ b/xen/arch/x86/cpu-policy.c @@ -278,9 +278,6 @@ static void recalculate_misc(struct cpu_policy *p) p->basic.raw[0x8] = EMPTY_LEAF; - /* TODO: Rework topology logic. */ - memset(p->topo.raw, 0, sizeof(p->topo.raw)); - p->basic.raw[0xc] = EMPTY_LEAF; p->extd.e1d &= ~CPUID_COMMON_1D_FEATURES; @@ -621,6 +618,9 @@ static void __init calculate_pv_max_policy(void) recalculate_xstate(p); p->extd.raw[0xa] = EMPTY_LEAF; /* No SVM for PV guests. */ + + /* Wipe host topology. Toolstack is expected to synthesise a sensible one */ + memset(p->topo.raw, 0, sizeof(p->topo.raw)); } static void __init calculate_pv_def_policy(void) @@ -773,6 +773,9 @@ static void __init calculate_hvm_max_policy(void) /* It's always possible to emulate CPUID faulting for HVM guests */ p->platform_info.cpuid_faulting = true; + + /* Wipe host topology. Toolstack is expected to synthesise a sensible one */ + memset(p->topo.raw, 0, sizeof(p->topo.raw)); } static void __init calculate_hvm_def_policy(void)
Expose sensible topologies in leaf 0xb. At the moment it synthesises non-HT systems, in line with the previous code intent. Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com> --- v2: * Zap the topology leaves of (pv/hvm)_(def/max)_policy rather than the host policy --- tools/libs/guest/xg_cpuid_x86.c | 62 +++++---------------------------- xen/arch/x86/cpu-policy.c | 9 +++-- 2 files changed, 15 insertions(+), 56 deletions(-)