@@ -299,6 +299,7 @@ static inline void indirect_branch_prediction_barrier(void)
/* The Intel SPEC CTRL MSR base value cache */
extern u64 x86_spec_ctrl_base;
extern void write_spec_ctrl_current(u64 val, bool force);
+extern u64 spec_ctrl_current(void);
/*
* With retpoline, we must use IBRS to restrict branch prediction
@@ -77,6 +77,12 @@ void write_spec_ctrl_current(u64 val, bool force)
wrmsrl(MSR_IA32_SPEC_CTRL, val);
}
+u64 spec_ctrl_current(void)
+{
+ return this_cpu_read(x86_spec_ctrl_current);
+}
+EXPORT_SYMBOL_GPL(spec_ctrl_current);
+
/*
* The vendor and possibly platform specific bits which can be modified in
* x86_spec_ctrl_base.
@@ -46,11 +46,13 @@
#include <linux/tick.h>
#include <trace/events/power.h>
#include <linux/sched.h>
+#include <linux/sched/smt.h>
#include <linux/notifier.h>
#include <linux/cpu.h>
#include <linux/moduleparam.h>
#include <asm/cpu_device_id.h>
#include <asm/intel-family.h>
+#include <asm/nospec-branch.h>
#include <asm/mwait.h>
#include <asm/msr.h>
@@ -97,6 +99,12 @@ static struct cpuidle_state *cpuidle_state_table;
*/
#define CPUIDLE_FLAG_TLB_FLUSHED 0x10000
+/*
+ * Disable IBRS across idle (when KERNEL_IBRS), is exclusive vs IRQ_ENABLE
+ * above.
+ */
+#define CPUIDLE_FLAG_IBRS BIT(16)
+
/*
* MWAIT takes an 8-bit "hint" in EAX "suggesting"
* the C-state (top nibble) and sub-state (bottom nibble)
@@ -107,6 +115,24 @@ static struct cpuidle_state *cpuidle_state_table;
#define flg2MWAIT(flags) (((flags) >> 24) & 0xFF)
#define MWAIT2flg(eax) ((eax & 0xFF) << 24)
+static __cpuidle int intel_idle_ibrs(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int index)
+{
+ bool smt_active = sched_smt_active();
+ u64 spec_ctrl = spec_ctrl_current();
+ int ret;
+
+ if (smt_active)
+ wrmsrl(MSR_IA32_SPEC_CTRL, 0);
+
+ ret = intel_idle(dev, drv, index);
+
+ if (smt_active)
+ wrmsrl(MSR_IA32_SPEC_CTRL, spec_ctrl);
+
+ return ret;
+}
+
/*
* States are indexed by the cstate number,
* which is also the index into the MWAIT hint array.
@@ -605,7 +631,7 @@ static struct cpuidle_state skl_cstates[] = {
{
.name = "C6",
.desc = "MWAIT 0x20",
- .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
.exit_latency = 85,
.target_residency = 200,
.enter = &intel_idle,
@@ -613,7 +639,7 @@ static struct cpuidle_state skl_cstates[] = {
{
.name = "C7s",
.desc = "MWAIT 0x33",
- .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
.exit_latency = 124,
.target_residency = 800,
.enter = &intel_idle,
@@ -621,7 +647,7 @@ static struct cpuidle_state skl_cstates[] = {
{
.name = "C8",
.desc = "MWAIT 0x40",
- .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
.exit_latency = 200,
.target_residency = 800,
.enter = &intel_idle,
@@ -629,7 +655,7 @@ static struct cpuidle_state skl_cstates[] = {
{
.name = "C9",
.desc = "MWAIT 0x50",
- .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
.exit_latency = 480,
.target_residency = 5000,
.enter = &intel_idle,
@@ -637,7 +663,7 @@ static struct cpuidle_state skl_cstates[] = {
{
.name = "C10",
.desc = "MWAIT 0x60",
- .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
.exit_latency = 890,
.target_residency = 5000,
.enter = &intel_idle,
@@ -666,7 +692,7 @@ static struct cpuidle_state skx_cstates[] = {
{
.name = "C6",
.desc = "MWAIT 0x20",
- .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
.exit_latency = 133,
.target_residency = 600,
.enter = &intel_idle,
@@ -1370,6 +1396,11 @@ static void __init intel_idle_cpuidle_driver_init(void)
drv->states[drv->state_count] = /* structure copy */
cpuidle_state_table[cstate];
+ if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) &&
+ cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IBRS) {
+ drv->states[drv->state_count].enter = intel_idle_ibrs;
+ }
+
drv->state_count += 1;
}