@@ -82,6 +82,8 @@ extern void fpsimd_restore_current_state(void);
extern void fpsimd_update_current_state(struct user_fpsimd_state const *state);
extern void fpsimd_kvm_prepare(void);
+extern void fpsimd_idle_enter(void);
+
struct cpu_fp_state {
struct user_fpsimd_state *st;
void *sve_state;
@@ -2126,6 +2126,20 @@ static void __init fpsimd_pm_init(void)
static inline void fpsimd_pm_init(void) { }
#endif /* CONFIG_CPU_PM */
+void fpsimd_idle_enter(void)
+{
+ /*
+ * Leaving SME enabled may leave this core contending with
+ * other cores if we have a SMCU, disable whenever we enter
+ * idle to avoid this. Only do this if they're actually
+ * enabled to avoid overhead in cases where we don't enter a
+ * low enough power state to loose register state.
+ */
+ if (system_supports_sme() &&
+ (read_sysreg_s(SYS_SVCR) & (SVCR_SM_MASK | SVCR_ZA_MASK)))
+ fpsimd_save_and_flush_cpu_state();
+}
+
#ifdef CONFIG_HOTPLUG_CPU
static int fpsimd_cpu_dead(unsigned int cpu)
{
@@ -68,6 +68,11 @@ EXPORT_SYMBOL(__stack_chk_guard);
void (*pm_power_off)(void);
EXPORT_SYMBOL_GPL(pm_power_off);
+void arch_cpu_idle_enter(void)
+{
+ fpsimd_idle_enter();
+}
+
#ifdef CONFIG_HOTPLUG_CPU
void __noreturn arch_cpu_idle_dead(void)
{
When we enter the kernel we currently don't update any of the floating point register state until either something else uses floating point or we get a CPU_PM_ENTER notification during suspend or cpuidle. This means that for a system which has been configured with both suspend and cpuidle disabled we will leave whatever floating point state was loaded in the registers present while a CPU is idling. For SME this may cause an idling CPU to interfere with the operation of other CPUs, SME may be implemented as a SMCU shared with between multiple CPUs. Leaving ZA or especially streaming mode enabled may be taken by the hardware as an indication that SME is active use by the CPU and cause resources to be allocated to it at the expense of other CPUs. Since we exit streaming mode on entering syscalls we are unlikely to be idling the CPU in streaming mode due to a blocking syscall, most likely it will be a task being scheduled to a different CPU or potentially fault handling. The former will result in the loaded state being invalidated, the latter is a slow path anyway. For ZA we do allow it to be enabled during syscalls so we could see a blocking syscall for that, although the strong recommendation is that it be disabled as much as possible so it should also be an uncommon case. Add an arch_cpu_idle_enter() implementation which disables SME if it is active when we idle the CPU, ensuring that we don't create any spurious contention even if cpuidle is not enabled. Signed-off-by: Mark Brown <broonie@kernel.org> --- Changes in v2: - Rebase onto v6.11-rc1. - Tweak commit message. - Link to v1: https://lore.kernel.org/r/20240618-arm64-sme-no-cpuidle-v1-1-1de872e1691f@kernel.org --- arch/arm64/include/asm/fpsimd.h | 2 ++ arch/arm64/kernel/fpsimd.c | 14 ++++++++++++++ arch/arm64/kernel/process.c | 5 +++++ 3 files changed, 21 insertions(+) --- base-commit: 8400291e289ee6b2bf9779ff1c83a291501f017b change-id: 20240617-arm64-sme-no-cpuidle-8c68ef86e370 Best regards,