Message ID | 56FBAB9D02000078000E12C2@prv-mh.provo.novell.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
>>> On 30.03.16 at 10:34, <JBeulich@suse.com> wrote: > Some SKL-H configurations require "max_cstate=7" to boot. > While that is an effective workaround, it disables C10. > > This patch detects the problematic configuration, > and disables C8 and C9, keeping C10 enabled. > > Note that enabling SGX in BIOS SETUP can also prevent this issue, > if the system BIOS provides that option. > > https://bugzilla.kernel.org/show_bug.cgi?id=109081 > "Freezes with Intel i7 6700HQ (Skylake), unless intel_idle.max_cstate=7" > > Signed-off-by: Len Brown <len.brown@intel.com> [Linux commit: d70e28f57e14a481977436695b0c9ba165472431] > Adjust to Xen infrastructure. > > Signed-off-by: Jan Beulich <jbeulich@suse.com> > > --- a/xen/arch/x86/cpu/mwait-idle.c > +++ b/xen/arch/x86/cpu/mwait-idle.c > @@ -60,7 +60,7 @@ > #include <asm/msr.h> > #include <acpi/cpufreq/cpufreq.h> > > -#define MWAIT_IDLE_VERSION "0.4" > +#define MWAIT_IDLE_VERSION "0.4.1" > #undef PREFIX > #define PREFIX "mwait-idle: " > > @@ -100,6 +100,7 @@ static const struct cpuidle_state { > unsigned int target_residency; /* in US */ > } *cpuidle_state_table; > > +#define CPUIDLE_FLAG_DISABLED 0x1 > /* > * Set this flag for states where the HW flushes the TLB for us > * and so we don't need cross-calls to keep it consistent. > @@ -477,7 +478,7 @@ static const struct cpuidle_state bdw_cs > {} > }; > > -static const struct cpuidle_state skl_cstates[] = { > +static struct cpuidle_state skl_cstates[] = { > { > .name = "C1-SKL", > .flags = MWAIT2flg(0x00), > @@ -781,34 +782,84 @@ static const struct x86_cpu_id intel_idl > }; > > /* > - * mwait_idle_state_table_update() > - * > - * Update the default state_table for this CPU-id > + * ivt_idle_state_table_update(void) > * > - * Currently used to access tuned IVT multi-socket targets > + * Tune IVT multi-socket targets > * Assumption: num_sockets == (max_package_num + 1) > */ > -static void __init mwait_idle_state_table_update(void) > +static void __init ivt_idle_state_table_update(void) > { > /* IVT uses a different table for 1-2, 3-4, and > 4 sockets */ > - if (boot_cpu_data.x86_model == 0x3e) { /* IVT */ > - unsigned int cpu, max_apicid = boot_cpu_physical_apicid; > + unsigned int cpu, max_apicid = boot_cpu_physical_apicid; > > - for_each_present_cpu(cpu) > - if (max_apicid < x86_cpu_to_apicid[cpu]) > - max_apicid = x86_cpu_to_apicid[cpu]; > - switch (apicid_to_socket(max_apicid)) { > - case 0: case 1: > - /* 1 and 2 socket systems use default ivt_cstates */ > - break; > - case 2: case 3: > - cpuidle_state_table = ivt_cstates_4s; > - break; > - default: > - cpuidle_state_table = ivt_cstates_8s; > - break; > - } > + for_each_present_cpu(cpu) > + if (max_apicid < x86_cpu_to_apicid[cpu]) > + max_apicid = x86_cpu_to_apicid[cpu]; > + switch (apicid_to_socket(max_apicid)) { > + case 0: case 1: > + /* 1 and 2 socket systems use default ivt_cstates */ > + break; > + case 2: case 3: > + cpuidle_state_table = ivt_cstates_4s; > + break; > + default: > + cpuidle_state_table = ivt_cstates_8s; > + break; > + } > +} > + > +/* > + * sklh_idle_state_table_update(void) > + * > + * On SKL-H (model 0x5e) disable C8 and C9 if: > + * C10 is enabled and SGX disabled > + */ > +static void sklh_idle_state_table_update(void) > +{ > + u64 msr; > + > + /* if PC10 disabled via cmdline max_cstate=7 or shallower */ > + if (max_cstate <= 7) > + return; > + > + /* if PC10 not present in CPUID.MWAIT.EDX */ > + if ((mwait_substates & (MWAIT_CSTATE_MASK << 28)) == 0) > + return; > + > + rdmsrl(MSR_NHM_SNB_PKG_CST_CFG_CTL, msr); > + > + /* PC10 is not enabled in PKG C-state limit */ > + if ((msr & 0xF) != 8) > + return; > + > + /* if SGX is present */ > + if (boot_cpu_has(X86_FEATURE_SGX)) { > + rdmsrl(MSR_IA32_FEATURE_CONTROL, msr); > + > + /* if SGX is enabled */ > + if (msr & (1 << 18)) > + return; > } > + > + skl_cstates[5].flags |= CPUIDLE_FLAG_DISABLED; /* C8-SKL */ > + skl_cstates[6].flags |= CPUIDLE_FLAG_DISABLED; /* C9-SKL */ > +} > + > +/* > + * mwait_idle_state_table_update() > + * > + * Update the default state_table for this CPU-id > + */ > +static void __init mwait_idle_state_table_update(void) > +{ > + switch (boot_cpu_data.x86_model) { > + case 0x3e: /* IVT */ > + ivt_idle_state_table_update(); > + break; > + case 0x5e: /* SKL-H */ > + sklh_idle_state_table_update(); > + break; > + } > } > > static int __init mwait_idle_probe(void) > @@ -897,6 +948,14 @@ static int mwait_idle_cpu_init(struct no > if (num_substates == 0) > continue; > > + /* if state marked as disabled, skip it */ > + if (cpuidle_state_table[cstate].flags & > + CPUIDLE_FLAG_DISABLED) { > + printk(XENLOG_DEBUG PREFIX "state %s is disabled", > + cpuidle_state_table[cstate].name); > + continue; > + } > + > if (dev->count >= ACPI_PROCESSOR_MAX_POWER) { > printk(PREFIX "max C-state count of %u reached\n", > ACPI_PROCESSOR_MAX_POWER); > --- a/xen/include/asm-x86/msr-index.h > +++ b/xen/include/asm-x86/msr-index.h > @@ -288,6 +288,7 @@ > #define MSR_IA32_PLATFORM_ID 0x00000017 > #define MSR_IA32_EBL_CR_POWERON 0x0000002a > #define MSR_IA32_EBC_FREQUENCY_ID 0x0000002c > +#define MSR_IA32_FEATURE_CONTROL 0x0000003a > #define MSR_IA32_TSC_ADJUST 0x0000003b > > #define MSR_IA32_APICBASE 0x0000001b > --- a/xen/include/public/arch-x86/cpufeatureset.h > +++ b/xen/include/public/arch-x86/cpufeatureset.h > @@ -186,6 +186,7 @@ > /* Intel-defined CPU features, CPUID level 0x00000007:0.ebx, word 5 */ > XEN_CPUFEATURE(FSGSBASE, 5*32+ 0) /* {RD,WR}{FS,GS}BASE instructions > */ > XEN_CPUFEATURE(TSC_ADJUST, 5*32+ 1) /* TSC_ADJUST MSR available */ > +XEN_CPUFEATURE(SGX, 5*32+ 2) /* Software Guard extensions */ > XEN_CPUFEATURE(BMI1, 5*32+ 3) /* 1st bit manipulation extensions > */ > XEN_CPUFEATURE(HLE, 5*32+ 4) /* Hardware Lock Elision */ > XEN_CPUFEATURE(AVX2, 5*32+ 5) /* AVX2 instructions */
--- a/xen/arch/x86/cpu/mwait-idle.c +++ b/xen/arch/x86/cpu/mwait-idle.c @@ -60,7 +60,7 @@ #include <asm/msr.h> #include <acpi/cpufreq/cpufreq.h> -#define MWAIT_IDLE_VERSION "0.4" +#define MWAIT_IDLE_VERSION "0.4.1" #undef PREFIX #define PREFIX "mwait-idle: " @@ -100,6 +100,7 @@ static const struct cpuidle_state { unsigned int target_residency; /* in US */ } *cpuidle_state_table; +#define CPUIDLE_FLAG_DISABLED 0x1 /* * Set this flag for states where the HW flushes the TLB for us * and so we don't need cross-calls to keep it consistent. @@ -477,7 +478,7 @@ static const struct cpuidle_state bdw_cs {} }; -static const struct cpuidle_state skl_cstates[] = { +static struct cpuidle_state skl_cstates[] = { { .name = "C1-SKL", .flags = MWAIT2flg(0x00), @@ -781,34 +782,84 @@ static const struct x86_cpu_id intel_idl }; /* - * mwait_idle_state_table_update() - * - * Update the default state_table for this CPU-id + * ivt_idle_state_table_update(void) * - * Currently used to access tuned IVT multi-socket targets + * Tune IVT multi-socket targets * Assumption: num_sockets == (max_package_num + 1) */ -static void __init mwait_idle_state_table_update(void) +static void __init ivt_idle_state_table_update(void) { /* IVT uses a different table for 1-2, 3-4, and > 4 sockets */ - if (boot_cpu_data.x86_model == 0x3e) { /* IVT */ - unsigned int cpu, max_apicid = boot_cpu_physical_apicid; + unsigned int cpu, max_apicid = boot_cpu_physical_apicid; - for_each_present_cpu(cpu) - if (max_apicid < x86_cpu_to_apicid[cpu]) - max_apicid = x86_cpu_to_apicid[cpu]; - switch (apicid_to_socket(max_apicid)) { - case 0: case 1: - /* 1 and 2 socket systems use default ivt_cstates */ - break; - case 2: case 3: - cpuidle_state_table = ivt_cstates_4s; - break; - default: - cpuidle_state_table = ivt_cstates_8s; - break; - } + for_each_present_cpu(cpu) + if (max_apicid < x86_cpu_to_apicid[cpu]) + max_apicid = x86_cpu_to_apicid[cpu]; + switch (apicid_to_socket(max_apicid)) { + case 0: case 1: + /* 1 and 2 socket systems use default ivt_cstates */ + break; + case 2: case 3: + cpuidle_state_table = ivt_cstates_4s; + break; + default: + cpuidle_state_table = ivt_cstates_8s; + break; + } +} + +/* + * sklh_idle_state_table_update(void) + * + * On SKL-H (model 0x5e) disable C8 and C9 if: + * C10 is enabled and SGX disabled + */ +static void sklh_idle_state_table_update(void) +{ + u64 msr; + + /* if PC10 disabled via cmdline max_cstate=7 or shallower */ + if (max_cstate <= 7) + return; + + /* if PC10 not present in CPUID.MWAIT.EDX */ + if ((mwait_substates & (MWAIT_CSTATE_MASK << 28)) == 0) + return; + + rdmsrl(MSR_NHM_SNB_PKG_CST_CFG_CTL, msr); + + /* PC10 is not enabled in PKG C-state limit */ + if ((msr & 0xF) != 8) + return; + + /* if SGX is present */ + if (boot_cpu_has(X86_FEATURE_SGX)) { + rdmsrl(MSR_IA32_FEATURE_CONTROL, msr); + + /* if SGX is enabled */ + if (msr & (1 << 18)) + return; } + + skl_cstates[5].flags |= CPUIDLE_FLAG_DISABLED; /* C8-SKL */ + skl_cstates[6].flags |= CPUIDLE_FLAG_DISABLED; /* C9-SKL */ +} + +/* + * mwait_idle_state_table_update() + * + * Update the default state_table for this CPU-id + */ +static void __init mwait_idle_state_table_update(void) +{ + switch (boot_cpu_data.x86_model) { + case 0x3e: /* IVT */ + ivt_idle_state_table_update(); + break; + case 0x5e: /* SKL-H */ + sklh_idle_state_table_update(); + break; + } } static int __init mwait_idle_probe(void) @@ -897,6 +948,14 @@ static int mwait_idle_cpu_init(struct no if (num_substates == 0) continue; + /* if state marked as disabled, skip it */ + if (cpuidle_state_table[cstate].flags & + CPUIDLE_FLAG_DISABLED) { + printk(XENLOG_DEBUG PREFIX "state %s is disabled", + cpuidle_state_table[cstate].name); + continue; + } + if (dev->count >= ACPI_PROCESSOR_MAX_POWER) { printk(PREFIX "max C-state count of %u reached\n", ACPI_PROCESSOR_MAX_POWER); --- a/xen/include/asm-x86/msr-index.h +++ b/xen/include/asm-x86/msr-index.h @@ -288,6 +288,7 @@ #define MSR_IA32_PLATFORM_ID 0x00000017 #define MSR_IA32_EBL_CR_POWERON 0x0000002a #define MSR_IA32_EBC_FREQUENCY_ID 0x0000002c +#define MSR_IA32_FEATURE_CONTROL 0x0000003a #define MSR_IA32_TSC_ADJUST 0x0000003b #define MSR_IA32_APICBASE 0x0000001b --- a/xen/include/public/arch-x86/cpufeatureset.h +++ b/xen/include/public/arch-x86/cpufeatureset.h @@ -186,6 +186,7 @@ /* Intel-defined CPU features, CPUID level 0x00000007:0.ebx, word 5 */ XEN_CPUFEATURE(FSGSBASE, 5*32+ 0) /* {RD,WR}{FS,GS}BASE instructions */ XEN_CPUFEATURE(TSC_ADJUST, 5*32+ 1) /* TSC_ADJUST MSR available */ +XEN_CPUFEATURE(SGX, 5*32+ 2) /* Software Guard extensions */ XEN_CPUFEATURE(BMI1, 5*32+ 3) /* 1st bit manipulation extensions */ XEN_CPUFEATURE(HLE, 5*32+ 4) /* Hardware Lock Elision */ XEN_CPUFEATURE(AVX2, 5*32+ 5) /* AVX2 instructions */