@@ -114,6 +114,7 @@ void wbinvd_on_cpu(int cpu);
int wbinvd_on_all_cpus(void);
void smp_kick_mwait_play_dead(void);
+void smp_set_mwait_play_dead_hint(unsigned int hint);
void native_smp_send_reschedule(int cpu);
void native_send_call_func_ipi(const struct cpumask *mask);
@@ -164,6 +165,8 @@ static inline struct cpumask *cpu_llc_shared_mask(int cpu)
{
return (struct cpumask *)cpumask_of(0);
}
+
+static inline void smp_set_mwait_play_dead_hint(unsigned int hint) { }
#endif /* CONFIG_SMP */
#ifdef CONFIG_DEBUG_NMI_SELFTEST
@@ -127,6 +127,9 @@ int __read_mostly __max_smt_threads = 1;
/* Flag to indicate if a complete sched domain rebuild is required */
bool x86_topology_update;
+#define PLAY_DEAD_MWAIT_HINT_UNSET 0U
+static unsigned int __read_mostly play_dead_mwait_hint = PLAY_DEAD_MWAIT_HINT_UNSET;
+
int arch_update_cpu_topology(void)
{
int retval = x86_topology_update;
@@ -1270,6 +1273,11 @@ void play_dead_common(void)
local_irq_disable();
}
+void smp_set_mwait_play_dead_hint(unsigned int hint)
+{
+ WRITE_ONCE(play_dead_mwait_hint, hint);
+}
+
/* Computes mwait hint for the deepest mwait hint based on cpuid leaf 0x5 */
static inline unsigned int get_deepest_mwait_hint(void)
{
@@ -1322,7 +1330,9 @@ static inline void mwait_play_dead(void)
if (__this_cpu_read(cpu_info.cpuid_level) < CPUID_MWAIT_LEAF)
return;
- hint = get_deepest_mwait_hint();
+ hint = READ_ONCE(play_dead_mwait_hint);
+ if (hint == PLAY_DEAD_MWAIT_HINT_UNSET)
+ hint = get_deepest_mwait_hint();
/* Set up state for the kexec() hack below */
md->status = CPUDEAD_MWAIT_WAIT;
The current implementation for looking up the mwait hint for the deepest cstate depends on them to be continuous in range [0, NUM_SUBSTATES-1]. While that is correct on most Intel x86 platforms, it is not architectural and may not result in reaching the most optimized idle state on some of them. For example Intel's Sierra Forest report two C6 substates in cpuid leaf 5: C6S (hint 0x22) C6SP (hint 0x23) Hints 0x20 and 0x21 are skipped entirely, causing the current implementation to compute the wrong hint, when looking for the deepest cstate for offlined CPU to enter. As a result, package with an offlined CPU can never reach PC6. Allow the idle driver to communicate the deepest idle cstate to the x86 offline code. Signed-off-by: Patryk Wlazlyn <patryk.wlazlyn@linux.intel.com> --- arch/x86/include/asm/smp.h | 3 +++ arch/x86/kernel/smpboot.c | 12 +++++++++++- 2 files changed, 14 insertions(+), 1 deletion(-)