Message ID | 82277592-ea96-47c8-a991-7afd97d7a7bc@suse.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [for,4.19?] x86/Intel: unlock CPUID earlier for the BSP | expand |
On Thu, 2024-06-13 at 10:19 +0200, Jan Beulich wrote: > Intel CPUs have a MSR bit to limit CPUID enumeration to leaf two. If > this bit is set by the BIOS then CPUID evaluation does not work when > data from any leaf greater than two is needed; early_cpu_init() in > particular wants to collect leaf 7 data. > > Cure this by unlocking CPUID right before evaluating anything which > depends on the maximum CPUID leaf being greater than two. > > Inspired by (and description cloned from) Linux commit 0c2f6d04619e > ("x86/topology/intel: Unlock CPUID before evaluating anything"). > > Signed-off-by: Jan Beulich <jbeulich@suse.com> Release-Acked-by: Oleksii Kurochko <oleksii.kurochko@gmail.com> ~ Oleksii > --- > While I couldn't spot anything, it kind of feels as if I'm > overlooking > further places where we might be inspecting in particular leaf 7 yet > earlier. > > No Fixes: tag(s), as imo it would be too many that would want > enumerating. > > --- a/xen/arch/x86/cpu/common.c > +++ b/xen/arch/x86/cpu/common.c > @@ -336,7 +336,8 @@ void __init early_cpu_init(bool verbose) > > c->x86_vendor = x86_cpuid_lookup_vendor(ebx, ecx, edx); > switch (c->x86_vendor) { > - case X86_VENDOR_INTEL: actual_cpu = intel_cpu_dev; > break; > + case X86_VENDOR_INTEL: intel_unlock_cpuid_leaves(c); > + actual_cpu = intel_cpu_dev; > break; > case X86_VENDOR_AMD: actual_cpu = amd_cpu_dev; > break; > case X86_VENDOR_CENTAUR: actual_cpu = centaur_cpu_dev; > break; > case X86_VENDOR_SHANGHAI: actual_cpu = shanghai_cpu_dev; > break; > --- a/xen/arch/x86/cpu/cpu.h > +++ b/xen/arch/x86/cpu/cpu.h > @@ -24,3 +24,5 @@ void amd_init_lfence(struct cpuinfo_x86 > void amd_init_ssbd(const struct cpuinfo_x86 *c); > void amd_init_spectral_chicken(void); > void detect_zen2_null_seg_behaviour(void); > + > +void intel_unlock_cpuid_leaves(struct cpuinfo_x86 *c); > --- a/xen/arch/x86/cpu/intel.c > +++ b/xen/arch/x86/cpu/intel.c > @@ -303,10 +303,24 @@ static void __init noinline intel_init_l > ctxt_switch_masking = intel_ctxt_switch_masking; > } > > -static void cf_check early_init_intel(struct cpuinfo_x86 *c) > +/* Unmask CPUID levels if masked. */ > +void intel_unlock_cpuid_leaves(struct cpuinfo_x86 *c) > { > - u64 misc_enable, disable; > + uint64_t misc_enable, disable; > + > + rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); > + > + disable = misc_enable & MSR_IA32_MISC_ENABLE_LIMIT_CPUID; > + if (disable) { > + wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable & > ~disable); > + bootsym(trampoline_misc_enable_off) |= disable; > + c->cpuid_level = cpuid_eax(0); > + printk(KERN_INFO "revised cpuid level: %u\n", c- > >cpuid_level); > + } > +} > > +static void cf_check early_init_intel(struct cpuinfo_x86 *c) > +{ > /* Netburst reports 64 bytes clflush size, but does IO in > 128 bytes */ > if (c->x86 == 15 && c->x86_cache_alignment == 64) > c->x86_cache_alignment = 128; > @@ -315,16 +329,7 @@ static void cf_check early_init_intel(st > bootsym(trampoline_misc_enable_off) & > MSR_IA32_MISC_ENABLE_XD_DISABLE) > printk(KERN_INFO "re-enabled NX (Execute Disable) > protection\n"); > > - /* Unmask CPUID levels and NX if masked: */ > - rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); > - > - disable = misc_enable & MSR_IA32_MISC_ENABLE_LIMIT_CPUID; > - if (disable) { > - wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable & > ~disable); > - bootsym(trampoline_misc_enable_off) |= disable; > - printk(KERN_INFO "revised cpuid level: %d\n", > - cpuid_eax(0)); > - } > + intel_unlock_cpuid_leaves(c); > > /* CPUID workaround for Intel 0F33/0F34 CPU */ > if (boot_cpu_data.x86 == 0xF && boot_cpu_data.x86_model == 3 > &&
On Thu, Jun 13, 2024 at 10:19:30AM +0200, Jan Beulich wrote: > Intel CPUs have a MSR bit to limit CPUID enumeration to leaf two. If > this bit is set by the BIOS then CPUID evaluation does not work when > data from any leaf greater than two is needed; early_cpu_init() in > particular wants to collect leaf 7 data. > > Cure this by unlocking CPUID right before evaluating anything which > depends on the maximum CPUID leaf being greater than two. > > Inspired by (and description cloned from) Linux commit 0c2f6d04619e > ("x86/topology/intel: Unlock CPUID before evaluating anything"). > > Signed-off-by: Jan Beulich <jbeulich@suse.com> > --- > While I couldn't spot anything, it kind of feels as if I'm overlooking > further places where we might be inspecting in particular leaf 7 yet > earlier. > > No Fixes: tag(s), as imo it would be too many that would want > enumerating. > > --- a/xen/arch/x86/cpu/common.c > +++ b/xen/arch/x86/cpu/common.c > @@ -336,7 +336,8 @@ void __init early_cpu_init(bool verbose) > > c->x86_vendor = x86_cpuid_lookup_vendor(ebx, ecx, edx); > switch (c->x86_vendor) { > - case X86_VENDOR_INTEL: actual_cpu = intel_cpu_dev; break; > + case X86_VENDOR_INTEL: intel_unlock_cpuid_leaves(c); > + actual_cpu = intel_cpu_dev; break; > case X86_VENDOR_AMD: actual_cpu = amd_cpu_dev; break; > case X86_VENDOR_CENTAUR: actual_cpu = centaur_cpu_dev; break; > case X86_VENDOR_SHANGHAI: actual_cpu = shanghai_cpu_dev; break; > --- a/xen/arch/x86/cpu/cpu.h > +++ b/xen/arch/x86/cpu/cpu.h > @@ -24,3 +24,5 @@ void amd_init_lfence(struct cpuinfo_x86 > void amd_init_ssbd(const struct cpuinfo_x86 *c); > void amd_init_spectral_chicken(void); > void detect_zen2_null_seg_behaviour(void); > + > +void intel_unlock_cpuid_leaves(struct cpuinfo_x86 *c); > --- a/xen/arch/x86/cpu/intel.c > +++ b/xen/arch/x86/cpu/intel.c > @@ -303,10 +303,24 @@ static void __init noinline intel_init_l > ctxt_switch_masking = intel_ctxt_switch_masking; > } > > -static void cf_check early_init_intel(struct cpuinfo_x86 *c) > +/* Unmask CPUID levels if masked. */ > +void intel_unlock_cpuid_leaves(struct cpuinfo_x86 *c) > { > - u64 misc_enable, disable; > + uint64_t misc_enable, disable; > + > + rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); > + > + disable = misc_enable & MSR_IA32_MISC_ENABLE_LIMIT_CPUID; > + if (disable) { > + wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable & ~disable); > + bootsym(trampoline_misc_enable_off) |= disable; > + c->cpuid_level = cpuid_eax(0); > + printk(KERN_INFO "revised cpuid level: %u\n", c->cpuid_level); > + } > +} > > +static void cf_check early_init_intel(struct cpuinfo_x86 *c) > +{ > /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */ > if (c->x86 == 15 && c->x86_cache_alignment == 64) > c->x86_cache_alignment = 128; > @@ -315,16 +329,7 @@ static void cf_check early_init_intel(st > bootsym(trampoline_misc_enable_off) & MSR_IA32_MISC_ENABLE_XD_DISABLE) > printk(KERN_INFO "re-enabled NX (Execute Disable) protection\n"); > > - /* Unmask CPUID levels and NX if masked: */ > - rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); > - > - disable = misc_enable & MSR_IA32_MISC_ENABLE_LIMIT_CPUID; > - if (disable) { > - wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable & ~disable); > - bootsym(trampoline_misc_enable_off) |= disable; > - printk(KERN_INFO "revised cpuid level: %d\n", > - cpuid_eax(0)); > - } > + intel_unlock_cpuid_leaves(c); Do you really need to call intel_unlock_cpuid_leaves() here? For the BSP it will be taken care in early_cpu_init(), and for the APs MSR_IA32_MISC_ENABLE it will be set as part of the trampoline with the disables from trampoline_misc_enable_off. Thanks, Roger.
On 13.06.2024 17:16, Roger Pau Monné wrote: > On Thu, Jun 13, 2024 at 10:19:30AM +0200, Jan Beulich wrote: >> @@ -315,16 +329,7 @@ static void cf_check early_init_intel(st >> bootsym(trampoline_misc_enable_off) & MSR_IA32_MISC_ENABLE_XD_DISABLE) >> printk(KERN_INFO "re-enabled NX (Execute Disable) protection\n"); >> >> - /* Unmask CPUID levels and NX if masked: */ >> - rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); >> - >> - disable = misc_enable & MSR_IA32_MISC_ENABLE_LIMIT_CPUID; >> - if (disable) { >> - wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable & ~disable); >> - bootsym(trampoline_misc_enable_off) |= disable; >> - printk(KERN_INFO "revised cpuid level: %d\n", >> - cpuid_eax(0)); >> - } >> + intel_unlock_cpuid_leaves(c); > > Do you really need to call intel_unlock_cpuid_leaves() here? > > For the BSP it will be taken care in early_cpu_init(), and for the APs > MSR_IA32_MISC_ENABLE it will be set as part of the trampoline with the > disables from trampoline_misc_enable_off. The way the original code works, it allows to deal with the BSP having the bit clear, but some or all of the APs having it set. I simply didn't want to break that property. Jan
On Thu, Jun 13, 2024 at 05:53:02PM +0200, Jan Beulich wrote: > On 13.06.2024 17:16, Roger Pau Monné wrote: > > On Thu, Jun 13, 2024 at 10:19:30AM +0200, Jan Beulich wrote: > >> @@ -315,16 +329,7 @@ static void cf_check early_init_intel(st > >> bootsym(trampoline_misc_enable_off) & MSR_IA32_MISC_ENABLE_XD_DISABLE) > >> printk(KERN_INFO "re-enabled NX (Execute Disable) protection\n"); > >> > >> - /* Unmask CPUID levels and NX if masked: */ > >> - rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); > >> - > >> - disable = misc_enable & MSR_IA32_MISC_ENABLE_LIMIT_CPUID; > >> - if (disable) { > >> - wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable & ~disable); > >> - bootsym(trampoline_misc_enable_off) |= disable; > >> - printk(KERN_INFO "revised cpuid level: %d\n", > >> - cpuid_eax(0)); > >> - } > >> + intel_unlock_cpuid_leaves(c); > > > > Do you really need to call intel_unlock_cpuid_leaves() here? > > > > For the BSP it will be taken care in early_cpu_init(), and for the APs > > MSR_IA32_MISC_ENABLE it will be set as part of the trampoline with the > > disables from trampoline_misc_enable_off. > > The way the original code works, it allows to deal with the BSP having the > bit clear, but some or all of the APs having it set. I simply didn't want > to break that property. Oh, I see. This looks like something we would unconditionally like to set in trampoline_misc_enable_off? Except that would trigger an unnecessary write to the MSR if the CPU already has it disabled. Reviewed-by: Roger Pau Monné <roger.pau@citrix.com> I think the printk could be made a printk_once, since it doesn't even print the CPU where the bit has been seen as set, but anyway, that would be further adjustments. Thanks, Roger.
On 13.06.2024 18:04, Roger Pau Monné wrote: > On Thu, Jun 13, 2024 at 05:53:02PM +0200, Jan Beulich wrote: >> On 13.06.2024 17:16, Roger Pau Monné wrote: >>> On Thu, Jun 13, 2024 at 10:19:30AM +0200, Jan Beulich wrote: >>>> @@ -315,16 +329,7 @@ static void cf_check early_init_intel(st >>>> bootsym(trampoline_misc_enable_off) & MSR_IA32_MISC_ENABLE_XD_DISABLE) >>>> printk(KERN_INFO "re-enabled NX (Execute Disable) protection\n"); >>>> >>>> - /* Unmask CPUID levels and NX if masked: */ >>>> - rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); >>>> - >>>> - disable = misc_enable & MSR_IA32_MISC_ENABLE_LIMIT_CPUID; >>>> - if (disable) { >>>> - wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable & ~disable); >>>> - bootsym(trampoline_misc_enable_off) |= disable; >>>> - printk(KERN_INFO "revised cpuid level: %d\n", >>>> - cpuid_eax(0)); >>>> - } >>>> + intel_unlock_cpuid_leaves(c); >>> >>> Do you really need to call intel_unlock_cpuid_leaves() here? >>> >>> For the BSP it will be taken care in early_cpu_init(), and for the APs >>> MSR_IA32_MISC_ENABLE it will be set as part of the trampoline with the >>> disables from trampoline_misc_enable_off. >> >> The way the original code works, it allows to deal with the BSP having the >> bit clear, but some or all of the APs having it set. I simply didn't want >> to break that property. > > Oh, I see. This looks like something we would unconditionally like to > set in trampoline_misc_enable_off? Except that would trigger an > unnecessary write to the MSR if the CPU already has it disabled. Might be an option; the extra MSR access may not be _that_ harmful. > Reviewed-by: Roger Pau Monné <roger.pau@citrix.com> Thanks. > I think the printk could be made a printk_once, since it doesn't even > print the CPU where the bit has been seen as set, but anyway, that > would be further adjustments. Well, yes and no. Having it the way it is now and seeing the message twice in a log would be indicative of a problem. Jan
On 13/06/2024 9:19 am, Jan Beulich wrote: > Intel CPUs have a MSR bit to limit CPUID enumeration to leaf two. If > this bit is set by the BIOS then CPUID evaluation does not work when > data from any leaf greater than two is needed; early_cpu_init() in > particular wants to collect leaf 7 data. > > Cure this by unlocking CPUID right before evaluating anything which > depends on the maximum CPUID leaf being greater than two. > > Inspired by (and description cloned from) Linux commit 0c2f6d04619e > ("x86/topology/intel: Unlock CPUID before evaluating anything"). > > Signed-off-by: Jan Beulich <jbeulich@suse.com> > --- > While I couldn't spot anything, it kind of feels as if I'm overlooking > further places where we might be inspecting in particular leaf 7 yet > earlier. > > No Fixes: tag(s), as imo it would be too many that would want > enumerating. I also saw that go by, but concluded that Xen doesn't need it, hence why I left it alone. The truth is that only the BSP needs it. APs sort it out in the trampoline via trampoline_misc_enable_off, because they need to clear XD_DISABLE in prior to enabling paging, so we should be taking it out of early_init_intel(). But, we don't have an early BSP-only early hook, and I'm not overwhelmed at the idea of exporting it from intel.c I was intending to leave it alone until I can burn this whole infrastructure to the ground and make it work nicely with policies, but that's not a job for this point in the release... ~Andrew
On 13.06.2024 18:17, Andrew Cooper wrote: > On 13/06/2024 9:19 am, Jan Beulich wrote: >> Intel CPUs have a MSR bit to limit CPUID enumeration to leaf two. If >> this bit is set by the BIOS then CPUID evaluation does not work when >> data from any leaf greater than two is needed; early_cpu_init() in >> particular wants to collect leaf 7 data. >> >> Cure this by unlocking CPUID right before evaluating anything which >> depends on the maximum CPUID leaf being greater than two. >> >> Inspired by (and description cloned from) Linux commit 0c2f6d04619e >> ("x86/topology/intel: Unlock CPUID before evaluating anything"). >> >> Signed-off-by: Jan Beulich <jbeulich@suse.com> >> --- >> While I couldn't spot anything, it kind of feels as if I'm overlooking >> further places where we might be inspecting in particular leaf 7 yet >> earlier. >> >> No Fixes: tag(s), as imo it would be too many that would want >> enumerating. > > I also saw that go by, but concluded that Xen doesn't need it, hence why > I left it alone. > > The truth is that only the BSP needs it. APs sort it out in the > trampoline via trampoline_misc_enable_off, because they need to clear > XD_DISABLE in prior to enabling paging, so we should be taking it out of > early_init_intel(). Except for the (odd) case also mentioned to Roger, where the BSP might have the bit clear but some (or all) AP(s) have it set. > But, we don't have an early BSP-only early hook, and I'm not overwhelmed > at the idea of exporting it from intel.c > > I was intending to leave it alone until I can burn this whole > infrastructure to the ground and make it work nicely with policies, but > that's not a job for this point in the release... This last part reads like the rest of your reply isn't an objection to me putting this in with Roger's R-b, but it would be nice if you could confirm this understanding of mine. Without this last part it, especially the 2nd from last paragraph, certainly reads a little like an objection. Jan
On 14/06/2024 7:27 am, Jan Beulich wrote: > On 13.06.2024 18:17, Andrew Cooper wrote: >> On 13/06/2024 9:19 am, Jan Beulich wrote: >>> Intel CPUs have a MSR bit to limit CPUID enumeration to leaf two. If >>> this bit is set by the BIOS then CPUID evaluation does not work when >>> data from any leaf greater than two is needed; early_cpu_init() in >>> particular wants to collect leaf 7 data. >>> >>> Cure this by unlocking CPUID right before evaluating anything which >>> depends on the maximum CPUID leaf being greater than two. >>> >>> Inspired by (and description cloned from) Linux commit 0c2f6d04619e >>> ("x86/topology/intel: Unlock CPUID before evaluating anything"). >>> >>> Signed-off-by: Jan Beulich <jbeulich@suse.com> >>> --- >>> While I couldn't spot anything, it kind of feels as if I'm overlooking >>> further places where we might be inspecting in particular leaf 7 yet >>> earlier. >>> >>> No Fixes: tag(s), as imo it would be too many that would want >>> enumerating. >> I also saw that go by, but concluded that Xen doesn't need it, hence why >> I left it alone. >> >> The truth is that only the BSP needs it. APs sort it out in the >> trampoline via trampoline_misc_enable_off, because they need to clear >> XD_DISABLE in prior to enabling paging, so we should be taking it out of >> early_init_intel(). > Except for the (odd) case also mentioned to Roger, where the BSP might have > the bit clear but some (or all) AP(s) have it set. Fine I suppose. It's a single MSR adjustment once per CPU. > >> But, we don't have an early BSP-only early hook, and I'm not overwhelmed >> at the idea of exporting it from intel.c >> >> I was intending to leave it alone until I can burn this whole >> infrastructure to the ground and make it work nicely with policies, but >> that's not a job for this point in the release... > This last part reads like the rest of your reply isn't an objection to me > putting this in with Roger's R-b, but it would be nice if you could > confirm this understanding of mine. Without this last part it, especially > the 2nd from last paragraph, certainly reads a little like an objection. I'm -1 to this generally. It's churn without fixing anything AFAICT, and is moving in a direction which will need undoing in the future. But, because it doesn't fix anything, I don't think it's appropriate to be tagged as 4.19 even if you and roger feel strongly enough to put it into 4.20. ~Andrew
On 14.06.2024 12:14, Andrew Cooper wrote: > On 14/06/2024 7:27 am, Jan Beulich wrote: >> On 13.06.2024 18:17, Andrew Cooper wrote: >>> On 13/06/2024 9:19 am, Jan Beulich wrote: >>>> Intel CPUs have a MSR bit to limit CPUID enumeration to leaf two. If >>>> this bit is set by the BIOS then CPUID evaluation does not work when >>>> data from any leaf greater than two is needed; early_cpu_init() in >>>> particular wants to collect leaf 7 data. >>>> >>>> Cure this by unlocking CPUID right before evaluating anything which >>>> depends on the maximum CPUID leaf being greater than two. >>>> >>>> Inspired by (and description cloned from) Linux commit 0c2f6d04619e >>>> ("x86/topology/intel: Unlock CPUID before evaluating anything"). >>>> >>>> Signed-off-by: Jan Beulich <jbeulich@suse.com> >>>> --- >>>> While I couldn't spot anything, it kind of feels as if I'm overlooking >>>> further places where we might be inspecting in particular leaf 7 yet >>>> earlier. >>>> >>>> No Fixes: tag(s), as imo it would be too many that would want >>>> enumerating. >>> I also saw that go by, but concluded that Xen doesn't need it, hence why >>> I left it alone. >>> >>> The truth is that only the BSP needs it. APs sort it out in the >>> trampoline via trampoline_misc_enable_off, because they need to clear >>> XD_DISABLE in prior to enabling paging, so we should be taking it out of >>> early_init_intel(). >> Except for the (odd) case also mentioned to Roger, where the BSP might have >> the bit clear but some (or all) AP(s) have it set. > > Fine I suppose. It's a single MSR adjustment once per CPU. > >> >>> But, we don't have an early BSP-only early hook, and I'm not overwhelmed >>> at the idea of exporting it from intel.c >>> >>> I was intending to leave it alone until I can burn this whole >>> infrastructure to the ground and make it work nicely with policies, but >>> that's not a job for this point in the release... >> This last part reads like the rest of your reply isn't an objection to me >> putting this in with Roger's R-b, but it would be nice if you could >> confirm this understanding of mine. Without this last part it, especially >> the 2nd from last paragraph, certainly reads a little like an objection. > > I'm -1 to this generally. It's churn without fixing anything AFAICT, How that? We clearly do the adjustment too late right now for the BSP. All the leaf-7 stuff added to early_cpu_init() (iirc in part in the course of speculation work) is useless on a system where firmware set that flag. Jan > and is moving in a direction which will need undoing in the future. > > But, because it doesn't fix anything, I don't think it's appropriate to > be tagged as 4.19 even if you and roger feel strongly enough to put it > into 4.20. > > ~Andrew
On 14/06/2024 12:12 pm, Jan Beulich wrote: > On 14.06.2024 12:14, Andrew Cooper wrote: >> On 14/06/2024 7:27 am, Jan Beulich wrote: >>> On 13.06.2024 18:17, Andrew Cooper wrote: >>>> On 13/06/2024 9:19 am, Jan Beulich wrote: >>>>> Intel CPUs have a MSR bit to limit CPUID enumeration to leaf two. If >>>>> this bit is set by the BIOS then CPUID evaluation does not work when >>>>> data from any leaf greater than two is needed; early_cpu_init() in >>>>> particular wants to collect leaf 7 data. >>>>> >>>>> Cure this by unlocking CPUID right before evaluating anything which >>>>> depends on the maximum CPUID leaf being greater than two. >>>>> >>>>> Inspired by (and description cloned from) Linux commit 0c2f6d04619e >>>>> ("x86/topology/intel: Unlock CPUID before evaluating anything"). >>>>> >>>>> Signed-off-by: Jan Beulich <jbeulich@suse.com> >>>>> --- >>>>> While I couldn't spot anything, it kind of feels as if I'm overlooking >>>>> further places where we might be inspecting in particular leaf 7 yet >>>>> earlier. >>>>> >>>>> No Fixes: tag(s), as imo it would be too many that would want >>>>> enumerating. >>>> I also saw that go by, but concluded that Xen doesn't need it, hence why >>>> I left it alone. >>>> >>>> The truth is that only the BSP needs it. APs sort it out in the >>>> trampoline via trampoline_misc_enable_off, because they need to clear >>>> XD_DISABLE in prior to enabling paging, so we should be taking it out of >>>> early_init_intel(). >>> Except for the (odd) case also mentioned to Roger, where the BSP might have >>> the bit clear but some (or all) AP(s) have it set. >> Fine I suppose. It's a single MSR adjustment once per CPU. >> >>>> But, we don't have an early BSP-only early hook, and I'm not overwhelmed >>>> at the idea of exporting it from intel.c >>>> >>>> I was intending to leave it alone until I can burn this whole >>>> infrastructure to the ground and make it work nicely with policies, but >>>> that's not a job for this point in the release... >>> This last part reads like the rest of your reply isn't an objection to me >>> putting this in with Roger's R-b, but it would be nice if you could >>> confirm this understanding of mine. Without this last part it, especially >>> the 2nd from last paragraph, certainly reads a little like an objection. >> I'm -1 to this generally. It's churn without fixing anything AFAICT, > How that? We clearly do the adjustment too late right now for the BSP. > All the leaf-7 stuff added to early_cpu_init() (iirc in part in the course > of speculation work) is useless on a system where firmware set that flag. After discussing this at the x86 maintainers meeting, I agree that it is fixing a potential bug. So, while I really dislike the patch, it's the right approach in the short term, and should go in. ~Andrew
--- a/xen/arch/x86/cpu/common.c +++ b/xen/arch/x86/cpu/common.c @@ -336,7 +336,8 @@ void __init early_cpu_init(bool verbose) c->x86_vendor = x86_cpuid_lookup_vendor(ebx, ecx, edx); switch (c->x86_vendor) { - case X86_VENDOR_INTEL: actual_cpu = intel_cpu_dev; break; + case X86_VENDOR_INTEL: intel_unlock_cpuid_leaves(c); + actual_cpu = intel_cpu_dev; break; case X86_VENDOR_AMD: actual_cpu = amd_cpu_dev; break; case X86_VENDOR_CENTAUR: actual_cpu = centaur_cpu_dev; break; case X86_VENDOR_SHANGHAI: actual_cpu = shanghai_cpu_dev; break; --- a/xen/arch/x86/cpu/cpu.h +++ b/xen/arch/x86/cpu/cpu.h @@ -24,3 +24,5 @@ void amd_init_lfence(struct cpuinfo_x86 void amd_init_ssbd(const struct cpuinfo_x86 *c); void amd_init_spectral_chicken(void); void detect_zen2_null_seg_behaviour(void); + +void intel_unlock_cpuid_leaves(struct cpuinfo_x86 *c); --- a/xen/arch/x86/cpu/intel.c +++ b/xen/arch/x86/cpu/intel.c @@ -303,10 +303,24 @@ static void __init noinline intel_init_l ctxt_switch_masking = intel_ctxt_switch_masking; } -static void cf_check early_init_intel(struct cpuinfo_x86 *c) +/* Unmask CPUID levels if masked. */ +void intel_unlock_cpuid_leaves(struct cpuinfo_x86 *c) { - u64 misc_enable, disable; + uint64_t misc_enable, disable; + + rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); + + disable = misc_enable & MSR_IA32_MISC_ENABLE_LIMIT_CPUID; + if (disable) { + wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable & ~disable); + bootsym(trampoline_misc_enable_off) |= disable; + c->cpuid_level = cpuid_eax(0); + printk(KERN_INFO "revised cpuid level: %u\n", c->cpuid_level); + } +} +static void cf_check early_init_intel(struct cpuinfo_x86 *c) +{ /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */ if (c->x86 == 15 && c->x86_cache_alignment == 64) c->x86_cache_alignment = 128; @@ -315,16 +329,7 @@ static void cf_check early_init_intel(st bootsym(trampoline_misc_enable_off) & MSR_IA32_MISC_ENABLE_XD_DISABLE) printk(KERN_INFO "re-enabled NX (Execute Disable) protection\n"); - /* Unmask CPUID levels and NX if masked: */ - rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); - - disable = misc_enable & MSR_IA32_MISC_ENABLE_LIMIT_CPUID; - if (disable) { - wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable & ~disable); - bootsym(trampoline_misc_enable_off) |= disable; - printk(KERN_INFO "revised cpuid level: %d\n", - cpuid_eax(0)); - } + intel_unlock_cpuid_leaves(c); /* CPUID workaround for Intel 0F33/0F34 CPU */ if (boot_cpu_data.x86 == 0xF && boot_cpu_data.x86_model == 3 &&
Intel CPUs have a MSR bit to limit CPUID enumeration to leaf two. If this bit is set by the BIOS then CPUID evaluation does not work when data from any leaf greater than two is needed; early_cpu_init() in particular wants to collect leaf 7 data. Cure this by unlocking CPUID right before evaluating anything which depends on the maximum CPUID leaf being greater than two. Inspired by (and description cloned from) Linux commit 0c2f6d04619e ("x86/topology/intel: Unlock CPUID before evaluating anything"). Signed-off-by: Jan Beulich <jbeulich@suse.com> --- While I couldn't spot anything, it kind of feels as if I'm overlooking further places where we might be inspecting in particular leaf 7 yet earlier. No Fixes: tag(s), as imo it would be too many that would want enumerating.