@@ -35,6 +35,7 @@ bool __read_mostly mce_broadcast;
bool is_mc_panic;
DEFINE_PER_CPU_READ_MOSTLY(unsigned int, nr_mce_banks);
unsigned int __read_mostly firstbank;
+unsigned int __read_mostly ppin_msr;
uint8_t __read_mostly cmci_apic_vector;
DEFINE_PER_CPU_READ_MOSTLY(struct mca_banks *, poll_bankmask);
@@ -999,10 +1000,17 @@ static void do_mc_get_cpu_info(void *v)
/*
* This part needs to run on the CPU itself.
*/
- xcp->mc_nmsrvals = __MC_NMSRS;
+ xcp->mc_nmsrvals = 1;
xcp->mc_msrvalues[0].reg = MSR_IA32_MCG_CAP;
rdmsrl(MSR_IA32_MCG_CAP, xcp->mc_msrvalues[0].value);
+ if ( ppin_msr && xcp->mc_nmsrvals < ARRAY_SIZE(xcp->mc_msrvalues) )
+ {
+ xcp->mc_msrvalues[xcp->mc_nmsrvals].reg = ppin_msr;
+ rdmsrl(ppin_msr, xcp->mc_msrvalues[xcp->mc_nmsrvals].value);
+ ++xcp->mc_nmsrvals;
+ }
+
if ( c->cpuid_level >= 1 )
{
cpuid(1, &junk, &ebx, &junk, &junk);
@@ -49,6 +49,7 @@ enum mcheck_type intel_mcheck_init(struc
void amd_nonfatal_mcheck_init(struct cpuinfo_x86 *c);
extern unsigned int firstbank;
+extern unsigned int ppin_msr;
struct mcinfo_extended *intel_get_extended_msrs(
struct mcinfo_global *mig, struct mc_info *mi);
@@ -315,6 +315,26 @@ amd_mcheck_init(struct cpuinfo_x86 *ci)
if ( quirkflag == MCEQUIRK_F10_GART )
mcequirk_amd_apply(quirkflag);
+ if ( cpu_has(ci, X86_FEATURE_AMD_PPIN) &&
+ (ci == &boot_cpu_data || ppin_msr) )
+ {
+ uint64_t val;
+
+ rdmsrl(MSR_AMD_PPIN_CTL, val);
+
+ /* If PPIN is disabled, but not locked, try to enable. */
+ if ( !(val & (PPIN_ENABLE | PPIN_LOCKOUT)) )
+ {
+ wrmsr_safe(MSR_PPIN_CTL, val | PPIN_ENABLE);
+ rdmsrl(MSR_AMD_PPIN_CTL, val);
+ }
+
+ if ( (val & (PPIN_ENABLE | PPIN_LOCKOUT)) != PPIN_ENABLE )
+ ppin_msr = 0;
+ else if ( ci == &boot_cpu_data )
+ ppin_msr = MSR_AMD_PPIN;
+ }
+
x86_mce_callback_register(amd_f10_handler);
mce_recoverable_register(mc_amd_recoverable_scan);
mce_register_addrcheck(mc_amd_addrcheck);
@@ -853,6 +853,43 @@ static void intel_init_mce(void)
mce_uhandler_num = ARRAY_SIZE(intel_mce_uhandlers);
}
+static void intel_init_ppin(const struct cpuinfo_x86 *c)
+{
+ /*
+ * Even if testing the presence of the MSR would be enough, we don't
+ * want to risk the situation where other models reuse this MSR for
+ * other purposes.
+ */
+ switch ( c->x86_model )
+ {
+ uint64_t val;
+
+ case 0x3e: /* IvyBridge X */
+ case 0x3f: /* Haswell X */
+ case 0x4f: /* Broadwell X */
+ case 0x55: /* Skylake X */
+ case 0x56: /* Broadwell Xeon D */
+ case 0x57: /* Knights Landing */
+ case 0x85: /* Knights Mill */
+
+ if ( (c != &boot_cpu_data && !ppin_msr) ||
+ rdmsr_safe(MSR_PPIN_CTL, val) )
+ return;
+
+ /* If PPIN is disabled, but not locked, try to enable. */
+ if ( !(val & (PPIN_ENABLE | PPIN_LOCKOUT)) )
+ {
+ wrmsr_safe(MSR_PPIN_CTL, val | PPIN_ENABLE);
+ rdmsr_safe(MSR_PPIN_CTL, val);
+ }
+
+ if ( (val & (PPIN_ENABLE | PPIN_LOCKOUT)) != PPIN_ENABLE )
+ ppin_msr = 0;
+ else if ( c == &boot_cpu_data )
+ ppin_msr = MSR_PPIN;
+ }
+}
+
static void cpu_mcabank_free(unsigned int cpu)
{
struct mca_banks *cmci = per_cpu(no_cmci_banks, cpu);
@@ -941,6 +978,8 @@ enum mcheck_type intel_mcheck_init(struc
intel_init_thermal(c);
+ intel_init_ppin(c);
+
return mcheck_intel;
}
@@ -45,6 +45,13 @@
#define MSR_PRED_CMD 0x00000049
#define PRED_CMD_IBPB (_AC(1, ULL) << 0)
+/* Intel Protected Processor Inventory Number */
+#define MSR_PPIN_CTL 0x0000004e
+#define MSR_PPIN 0x0000004f
+
+#define PPIN_LOCKOUT (_AC(1, ULL) << 0)
+#define PPIN_ENABLE (_AC(1, ULL) << 1)
+
#define MSR_ARCH_CAPABILITIES 0x0000010a
#define ARCH_CAPS_RDCL_NO (_AC(1, ULL) << 0)
#define ARCH_CAPS_IBRS_ALL (_AC(1, ULL) << 1)
@@ -278,6 +285,10 @@
#define MSR_AMD_OSVW_ID_LENGTH 0xc0010140
#define MSR_AMD_OSVW_STATUS 0xc0010141
+/* AMD Protected Processor Inventory Number */
+#define MSR_AMD_PPIN_CTL 0xc00102f0
+#define MSR_AMD_PPIN 0xc00102f1
+
/* K6 MSRs */
#define MSR_K6_EFER 0xc0000080
#define MSR_K6_STAR 0xc0000081
@@ -247,6 +247,7 @@ XEN_CPUFEATURE(RSTR_FP_ERR_PTRS, 8*32+ 2
XEN_CPUFEATURE(RSTR_FP_ERR_PTRS, 8*32+ 2) /*A (F)X{SAVE,RSTOR} always saves/restores FPU Error pointers */
XEN_CPUFEATURE(WBNOINVD, 8*32+ 9) /* WBNOINVD instruction */
XEN_CPUFEATURE(IBPB, 8*32+12) /*A IBPB support only (no IBRS, used by AMD) */
+XEN_CPUFEATURE(AMD_PPIN, 8*32+23) /* Protected Processor Inventory Number */
/* Intel-defined CPU features, CPUID level 0x00000007:0.edx, word 9 */
XEN_CPUFEATURE(AVX512_4VNNIW, 9*32+ 2) /*A AVX512 Neural Network Instructions */
@@ -246,7 +246,9 @@ typedef struct mc_info mc_info_t;
DEFINE_XEN_GUEST_HANDLE(mc_info_t);
#define __MC_MSR_ARRAYSIZE 8
+#if __XEN_INTERFACE_VERSION__ <= 0x00040d00
#define __MC_NMSRS 1
+#endif
#define MC_NCAPS 7 /* 7 CPU feature flag words */
#define MC_CAPS_STD_EDX 0 /* cpuid level 0x00000001 (%edx) */
#define MC_CAPS_AMD_EDX 1 /* cpuid level 0x80000001 (%edx) */
@@ -27,7 +27,7 @@
#ifndef __XEN_PUBLIC_XEN_COMPAT_H__
#define __XEN_PUBLIC_XEN_COMPAT_H__
-#define __XEN_LATEST_INTERFACE_VERSION__ 0x00040d00
+#define __XEN_LATEST_INTERFACE_VERSION__ 0x00040e00
#if defined(__XEN__) || defined(__XEN_TOOLS__)
/* Xen is built with matching headers and implements the latest interface. */
Quoting the respective Linux commit: Intel Xeons from Ivy Bridge onwards support a processor identification number set in the factory. To the user this is a handy unique number to identify a particular CPU. Intel can decode this to the fab/production run to track errors. On systems that have it, include it in the machine check record. I'm told that this would be helpful for users that run large data centers with multi-socket servers to keep track of which CPUs are seeing errors. Newer AMD CPUs support this too, at different MSR numbers. Take the opportunity and hide __MC_NMSRS from the public interface going forward. [Linux commit 3f5a7896a5096fd50030a04d4c3f28a7441e30a5] Signed-off-by: Jan Beulich <jbeulich@suse.com>