Message ID | 549909b7-e34c-4a5c-aa21-9892a1724042@suse.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | [v2] x86: amend 'n' debug-key output with SMI count | expand |
On Wed, Feb 14, 2024 at 11:15:51AM +0100, Jan Beulich wrote: > ... if available only, of course. > > Signed-off-by: Jan Beulich <jbeulich@suse.com> > --- > I don't really like issuing an IPI (and having another cf_check > function) here, yet then again this is issued only when the debug key > is actually used, and given how simple the handling function is > (including that it doesn't use its parameter) it also looks difficult > to abuse. > --- > v2: Actually read each CPU's SMI count in do_nmi_stats(). > > --- a/xen/arch/x86/cpu/common.c > +++ b/xen/arch/x86/cpu/common.c > @@ -407,9 +407,15 @@ void __init early_cpu_init(bool verbose) > paddr_bits -= (ebx >> 6) & 0x3f; > } > > - if (!(c->x86_vendor & (X86_VENDOR_AMD | X86_VENDOR_HYGON))) > + if (!(c->x86_vendor & (X86_VENDOR_AMD | X86_VENDOR_HYGON))) { > + uint64_t smi_count; > + > park_offline_cpus = opt_mce; > > + if (!verbose && !rdmsr_safe(MSR_SMI_COUNT, smi_count)) > + setup_force_cpu_cap(X86_FEATURE_SMI_COUNT); Why make it dependent on !verbose? The call with !verbose is tied to part of the ucode loading being half-functional (for example MCU_CONTROL_DIS_MCU_LOAD not being set) but I don't see that as a signal that SMI count shouldn't be used. does it need to be part of the early cpu initialization instead of being in the (later) Intel specific init code part of the identify_cpu()? > + } > + > initialize_cpu_data(0); > } > > --- a/xen/arch/x86/include/asm/cpufeatures.h > +++ b/xen/arch/x86/include/asm/cpufeatures.h > @@ -24,7 +24,7 @@ XEN_CPUFEATURE(APERFMPERF, X86_SY > XEN_CPUFEATURE(MFENCE_RDTSC, X86_SYNTH( 9)) /* MFENCE synchronizes RDTSC */ > XEN_CPUFEATURE(XEN_SMEP, X86_SYNTH(10)) /* SMEP gets used by Xen itself */ > XEN_CPUFEATURE(XEN_SMAP, X86_SYNTH(11)) /* SMAP gets used by Xen itself */ > -/* Bit 12 unused. */ > +XEN_CPUFEATURE(SMI_COUNT, X86_SYNTH(12)) /* MSR_SMI_COUNT exists */ > XEN_CPUFEATURE(IND_THUNK_LFENCE, X86_SYNTH(13)) /* Use IND_THUNK_LFENCE */ > XEN_CPUFEATURE(IND_THUNK_JMP, X86_SYNTH(14)) /* Use IND_THUNK_JMP */ > XEN_CPUFEATURE(SC_NO_BRANCH_HARDEN, X86_SYNTH(15)) /* (Disable) Conditional branch hardening */ > --- a/xen/arch/x86/include/asm/msr-index.h > +++ b/xen/arch/x86/include/asm/msr-index.h > @@ -28,6 +28,8 @@ > #define TEST_CTRL_SPLITLOCK_DETECT (_AC(1, ULL) << 29) > #define TEST_CTRL_SPLITLOCK_DISABLE (_AC(1, ULL) << 31) > > +#define MSR_SMI_COUNT 0x00000034 > + > #define MSR_INTEL_CORE_THREAD_COUNT 0x00000035 > #define MSR_CTC_THREAD_MASK 0x0000ffff > #define MSR_CTC_CORE_MASK _AC(0xffff0000, U) > --- a/xen/arch/x86/nmi.c > +++ b/xen/arch/x86/nmi.c > @@ -585,15 +585,34 @@ static void cf_check do_nmi_trigger(unsi > self_nmi(); > } > > +static DEFINE_PER_CPU(unsigned int, smi_count); > + > +static void cf_check read_smi_count(void *unused) > +{ > + unsigned int dummy; > + > + rdmsr(MSR_SMI_COUNT, this_cpu(smi_count), dummy); > +} > + > static void cf_check do_nmi_stats(unsigned char key) > { > const struct vcpu *v; > unsigned int cpu; > bool pend, mask; > > - printk("CPU\tNMI\n"); > + printk("CPU\tNMI%s\n", boot_cpu_has(X86_FEATURE_SMI_COUNT) ? "\tSMI" : ""); > + > + if ( boot_cpu_has(X86_FEATURE_SMI_COUNT) ) > + on_each_cpu(read_smi_count, NULL, 1); > + > for_each_online_cpu ( cpu ) > - printk("%3u\t%3u\n", cpu, per_cpu(nmi_count, cpu)); > + { > + printk("%3u\t%3u", cpu, per_cpu(nmi_count, cpu)); > + if ( boot_cpu_has(X86_FEATURE_SMI_COUNT) ) > + printk("\t%3u\n", per_cpu(smi_count, cpu)); > + else > + printk("\n"); > + } > > if ( !hardware_domain || !(v = domain_vcpu(hardware_domain, 0)) ) > return; Could you also amend the debug-key help text to mention SMI? Thanks, Roger.
On 16.02.2024 10:11, Roger Pau Monné wrote: > On Wed, Feb 14, 2024 at 11:15:51AM +0100, Jan Beulich wrote: >> --- a/xen/arch/x86/cpu/common.c >> +++ b/xen/arch/x86/cpu/common.c >> @@ -407,9 +407,15 @@ void __init early_cpu_init(bool verbose) >> paddr_bits -= (ebx >> 6) & 0x3f; >> } >> >> - if (!(c->x86_vendor & (X86_VENDOR_AMD | X86_VENDOR_HYGON))) >> + if (!(c->x86_vendor & (X86_VENDOR_AMD | X86_VENDOR_HYGON))) { >> + uint64_t smi_count; >> + >> park_offline_cpus = opt_mce; >> >> + if (!verbose && !rdmsr_safe(MSR_SMI_COUNT, smi_count)) >> + setup_force_cpu_cap(X86_FEATURE_SMI_COUNT); > > Why make it dependent on !verbose? The call with !verbose is tied to > part of the ucode loading being half-functional (for example > MCU_CONTROL_DIS_MCU_LOAD not being set) but I don't see that as a > signal that SMI count shouldn't be used. > > does it need to be part of the early cpu initialization instead of > being in the (later) Intel specific init code part of the > identify_cpu()? Yes, the condition was inverted. It could likely also be dropped altogether; not sure which one's better: On one hand avoiding multiple setup_force_cpu_cap() seems desirable (albeit not strictly necessary), while otoh the code would be simpler without. >> --- a/xen/arch/x86/nmi.c >> +++ b/xen/arch/x86/nmi.c >> @@ -585,15 +585,34 @@ static void cf_check do_nmi_trigger(unsi >> self_nmi(); >> } >> >> +static DEFINE_PER_CPU(unsigned int, smi_count); >> + >> +static void cf_check read_smi_count(void *unused) >> +{ >> + unsigned int dummy; >> + >> + rdmsr(MSR_SMI_COUNT, this_cpu(smi_count), dummy); >> +} >> + >> static void cf_check do_nmi_stats(unsigned char key) >> { >> const struct vcpu *v; >> unsigned int cpu; >> bool pend, mask; >> >> - printk("CPU\tNMI\n"); >> + printk("CPU\tNMI%s\n", boot_cpu_has(X86_FEATURE_SMI_COUNT) ? "\tSMI" : ""); >> + >> + if ( boot_cpu_has(X86_FEATURE_SMI_COUNT) ) >> + on_each_cpu(read_smi_count, NULL, 1); >> + >> for_each_online_cpu ( cpu ) >> - printk("%3u\t%3u\n", cpu, per_cpu(nmi_count, cpu)); >> + { >> + printk("%3u\t%3u", cpu, per_cpu(nmi_count, cpu)); >> + if ( boot_cpu_has(X86_FEATURE_SMI_COUNT) ) >> + printk("\t%3u\n", per_cpu(smi_count, cpu)); >> + else >> + printk("\n"); >> + } >> >> if ( !hardware_domain || !(v = domain_vcpu(hardware_domain, 0)) ) >> return; > > Could you also amend the debug-key help text to mention SMI? Hmm, I had considered that and decided against. I'm uncertain, nevertheless, so could be talked into amending that help text. Just that I can't make it "NMI and SMI statistics" as whether SMI data is available is conditional. Yet "NMI (and maybe SMI) statistics" looks a little clumsy to me ... Jan
--- a/xen/arch/x86/cpu/common.c +++ b/xen/arch/x86/cpu/common.c @@ -407,9 +407,15 @@ void __init early_cpu_init(bool verbose) paddr_bits -= (ebx >> 6) & 0x3f; } - if (!(c->x86_vendor & (X86_VENDOR_AMD | X86_VENDOR_HYGON))) + if (!(c->x86_vendor & (X86_VENDOR_AMD | X86_VENDOR_HYGON))) { + uint64_t smi_count; + park_offline_cpus = opt_mce; + if (!verbose && !rdmsr_safe(MSR_SMI_COUNT, smi_count)) + setup_force_cpu_cap(X86_FEATURE_SMI_COUNT); + } + initialize_cpu_data(0); } --- a/xen/arch/x86/include/asm/cpufeatures.h +++ b/xen/arch/x86/include/asm/cpufeatures.h @@ -24,7 +24,7 @@ XEN_CPUFEATURE(APERFMPERF, X86_SY XEN_CPUFEATURE(MFENCE_RDTSC, X86_SYNTH( 9)) /* MFENCE synchronizes RDTSC */ XEN_CPUFEATURE(XEN_SMEP, X86_SYNTH(10)) /* SMEP gets used by Xen itself */ XEN_CPUFEATURE(XEN_SMAP, X86_SYNTH(11)) /* SMAP gets used by Xen itself */ -/* Bit 12 unused. */ +XEN_CPUFEATURE(SMI_COUNT, X86_SYNTH(12)) /* MSR_SMI_COUNT exists */ XEN_CPUFEATURE(IND_THUNK_LFENCE, X86_SYNTH(13)) /* Use IND_THUNK_LFENCE */ XEN_CPUFEATURE(IND_THUNK_JMP, X86_SYNTH(14)) /* Use IND_THUNK_JMP */ XEN_CPUFEATURE(SC_NO_BRANCH_HARDEN, X86_SYNTH(15)) /* (Disable) Conditional branch hardening */ --- a/xen/arch/x86/include/asm/msr-index.h +++ b/xen/arch/x86/include/asm/msr-index.h @@ -28,6 +28,8 @@ #define TEST_CTRL_SPLITLOCK_DETECT (_AC(1, ULL) << 29) #define TEST_CTRL_SPLITLOCK_DISABLE (_AC(1, ULL) << 31) +#define MSR_SMI_COUNT 0x00000034 + #define MSR_INTEL_CORE_THREAD_COUNT 0x00000035 #define MSR_CTC_THREAD_MASK 0x0000ffff #define MSR_CTC_CORE_MASK _AC(0xffff0000, U) --- a/xen/arch/x86/nmi.c +++ b/xen/arch/x86/nmi.c @@ -585,15 +585,34 @@ static void cf_check do_nmi_trigger(unsi self_nmi(); } +static DEFINE_PER_CPU(unsigned int, smi_count); + +static void cf_check read_smi_count(void *unused) +{ + unsigned int dummy; + + rdmsr(MSR_SMI_COUNT, this_cpu(smi_count), dummy); +} + static void cf_check do_nmi_stats(unsigned char key) { const struct vcpu *v; unsigned int cpu; bool pend, mask; - printk("CPU\tNMI\n"); + printk("CPU\tNMI%s\n", boot_cpu_has(X86_FEATURE_SMI_COUNT) ? "\tSMI" : ""); + + if ( boot_cpu_has(X86_FEATURE_SMI_COUNT) ) + on_each_cpu(read_smi_count, NULL, 1); + for_each_online_cpu ( cpu ) - printk("%3u\t%3u\n", cpu, per_cpu(nmi_count, cpu)); + { + printk("%3u\t%3u", cpu, per_cpu(nmi_count, cpu)); + if ( boot_cpu_has(X86_FEATURE_SMI_COUNT) ) + printk("\t%3u\n", per_cpu(smi_count, cpu)); + else + printk("\n"); + } if ( !hardware_domain || !(v = domain_vcpu(hardware_domain, 0)) ) return;
... if available only, of course. Signed-off-by: Jan Beulich <jbeulich@suse.com> --- I don't really like issuing an IPI (and having another cf_check function) here, yet then again this is issued only when the debug key is actually used, and given how simple the handling function is (including that it doesn't use its parameter) it also looks difficult to abuse. --- v2: Actually read each CPU's SMI count in do_nmi_stats().