diff mbox series

[v4,2/4] x86/mwait: Add support for idle via umwait

Message ID 20230710093100.918337-3-dedekind1@gmail.com (mailing list archive)
State Changes Requested, archived
Headers show
Series Sapphire Rapids C0.x idle states support | expand

Commit Message

Artem Bityutskiy July 10, 2023, 9:30 a.m. UTC
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>

On Intel platforms, C-states are requested using the 'monitor/mwait'
instructions pair, as implemented in 'mwait_idle_with_hints()'. This
mechanism allows for entering C1 and deeper C-states.

Sapphire Rapids Xeon supports new idle states - C0.1 and C0.2 (later
C0.x). These idle states have lower latency comparing to C1, and can be
requested with either 'tpause' or 'umwait' instructions.

Linux kernel already supports the 'tpause' instruction and uses it in
delay functions like 'udelay()'. Add 'umwait' support by implementing
the 'umwait_idle()' function. This function is analogous to
'mwait_idle_with_hints()', but instead of requesting a C-state with
'monitor/mwait', it requests C0.x with 'umonitor/umwait'.

Tested with both gcc/binutils and clang/llvm.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
---
 arch/x86/include/asm/mwait.h | 67 ++++++++++++++++++++++++++++++++++++
 1 file changed, 67 insertions(+)
diff mbox series

Patch

diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index 03bef2bc28d4..48210f4d7c77 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -141,4 +141,71 @@  static inline void __tpause(u32 ecx, u32 edx, u32 eax)
 	}
 }
 
+#ifdef CONFIG_X86_64
+/*
+ * Monitor a memory address at 'rcx' using the 'umonitor' instruction.
+ */
+static __always_inline void __umonitor(const void *rcx)
+{
+	/* "umonitor %rcx" */
+	if (IS_ENABLED(CONFIG_AS_TPAUSE)) {
+		asm volatile("umonitor %%rcx\n"
+			     :
+			     : "c"(rcx));
+	} else {
+		asm volatile(".byte 0xf3, 0x0f, 0xae, 0xf1\t\n"
+			     :
+			     : "c"(rcx));
+	}
+}
+
+/*
+ * Same as '__tpause()', but uses the 'umwait' instruction. It is very
+ * similar to 'tpause', but also breaks out if the data at the address
+ * monitored with 'umonitor' is modified.
+ */
+static __always_inline void __umwait(u32 ecx, u32 edx, u32 eax)
+{
+	/* "umwait %ecx, %edx, %eax;" */
+	if (IS_ENABLED(CONFIG_AS_TPAUSE)) {
+		asm volatile("umwait %%ecx\n"
+			     :
+			     : "c"(ecx), "d"(edx), "a"(eax));
+	} else {
+		asm volatile(".byte 0xf2, 0x0f, 0xae, 0xf1\t\n"
+			     :
+			     : "c"(ecx), "d"(edx), "a"(eax));
+	}
+}
+
+/*
+ * Enter C0.1 or C0.2 state and stay there until an event happens (an interrupt
+ * or 'need_resched()'), the explicit deadline is reached, or the implicit
+ * global limit is reached.
+ *
+ * The deadline is the absolute TSC value to exit the idle state at. If it
+ * exceeds the global limit in the 'IA32_UMWAIT_CONTROL' register, the global
+ * limit prevails, and the idle state is exited earlier than the deadline.
+ */
+static __always_inline void umwait_idle(u64 deadline, u32 state)
+{
+	if (!current_set_polling_and_test()) {
+		u32 eax, edx;
+
+		eax = lower_32_bits(deadline);
+		edx = upper_32_bits(deadline);
+
+		__umonitor(&current_thread_info()->flags);
+		if (!need_resched())
+			__umwait(state, edx, eax);
+	}
+	current_clr_polling();
+}
+#else /* CONFIG_X86_64 */
+static __always_inline void umwait_idle(u64 deadline, u32 state)
+{
+	WARN_ONCE(1, "umwait CPU instruction is not supported");
+}
+#endif /* !CONFIG_X86_64 */
+
 #endif /* _ASM_X86_MWAIT_H */