diff mbox series

[v3] arm64: mte: avoid clearing PSTATE.TCO on entry unless necessary

Message ID 20220122010250.251885-1-pcc@google.com (mailing list archive)
State New, archived
Headers show
Series [v3] arm64: mte: avoid clearing PSTATE.TCO on entry unless necessary | expand

Commit Message

Peter Collingbourne Jan. 22, 2022, 1:02 a.m. UTC
On some microarchitectures, clearing PSTATE.TCO is expensive. Clearing
TCO is only necessary if in-kernel MTE is enabled, or if MTE is
enabled in the userspace process in synchronous (or, soon, asymmetric)
mode, because we do not report uaccess faults to userspace in none
or asynchronous modes. Therefore, adjust the kernel entry code to
clear TCO only if necessary.

Because it is now possible to switch to a task in which TCO needs to
be clear from a task in which TCO is set, we also need to do the same
thing on task switch.

Signed-off-by: Peter Collingbourne <pcc@google.com>
Link: https://linux-review.googlesource.com/id/I52d82a580bd0500d420be501af2c35fa8c90729e
---
v3:
- switch to a C implementation

v2:
- do the same thing in cpu_switch_to()

 arch/arm64/include/asm/mte.h     | 19 +++++++++++++++++++
 arch/arm64/kernel/entry-common.c |  3 +++
 arch/arm64/kernel/entry.S        |  7 -------
 arch/arm64/kernel/mte.c          |  1 +
 4 files changed, 23 insertions(+), 7 deletions(-)

Comments

Catalin Marinas Jan. 24, 2022, 11:45 a.m. UTC | #1
On Fri, Jan 21, 2022 at 05:02:50PM -0800, Peter Collingbourne wrote:
> diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h
> index 075539f5f1c8..5352db4c0f45 100644
> --- a/arch/arm64/include/asm/mte.h
> +++ b/arch/arm64/include/asm/mte.h
> @@ -11,7 +11,9 @@
>  #ifndef __ASSEMBLY__
>  
>  #include <linux/bitfield.h>
> +#include <linux/kasan.h>
>  #include <linux/page-flags.h>
> +#include <linux/sched.h>
>  #include <linux/types.h>
>  
>  #include <asm/pgtable-types.h>
> @@ -86,6 +88,23 @@ static inline int mte_ptrace_copy_tags(struct task_struct *child,
>  
>  #endif /* CONFIG_ARM64_MTE */
>  
> +static inline void mte_disable_tco_entry(struct task_struct *task)
> +{
> +	/*
> +	 * Re-enable tag checking (TCO set on exception entry). This is only
> +	 * necessary if MTE is enabled in either the kernel or the userspace
> +	 * task in synchronous mode. With MTE disabled in the kernel and
> +	 * disabled or asynchronous in userspace, tag check faults (including in
> +	 * uaccesses) are not reported, therefore there is no need to re-enable
> +	 * checking. This is beneficial on microarchitectures where re-enabling
> +	 * TCO is expensive.
> +	 */

I'd add a note here that the 1ULL << SCTLR_EL1_TCF0_SHIFT is meant to
check for both synchronous and asymmetric modes even if we don't have
the latter supporting the user yet. We do have the definitions already.

> +	if (kasan_hw_tags_enabled() ||
> +	    (system_supports_mte() &&
> +	     (task->thread.sctlr_user & (1UL << SCTLR_EL1_TCF0_SHIFT))))
> +		asm volatile(SET_PSTATE_TCO(0));
> +}

Does it make a difference in code generation if you place a:

	if (!system_supports_mte())
		return;

at the beginning of the function (and remove the subsequent check)? It's
probably also easier to read, though the code generation depends on the
likely/unlikely choices for the static branches involved.

> diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
> index f418ebc65f95..5345587f3384 100644
> --- a/arch/arm64/kernel/mte.c
> +++ b/arch/arm64/kernel/mte.c
> @@ -252,6 +252,7 @@ void mte_thread_switch(struct task_struct *next)
>  
>  	mte_update_sctlr_user(next);
>  	mte_update_gcr_excl(next);
> +	mte_disable_tco_entry(next);

Maybe a one-line comment here that TCO may not have been disabled on
exception entry for the current task.

Otherwise it looks good to me:

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Peter Collingbourne Jan. 25, 2022, 12:59 a.m. UTC | #2
On Mon, Jan 24, 2022 at 3:45 AM Catalin Marinas <catalin.marinas@arm.com> wrote:
>
> On Fri, Jan 21, 2022 at 05:02:50PM -0800, Peter Collingbourne wrote:
> > diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h
> > index 075539f5f1c8..5352db4c0f45 100644
> > --- a/arch/arm64/include/asm/mte.h
> > +++ b/arch/arm64/include/asm/mte.h
> > @@ -11,7 +11,9 @@
> >  #ifndef __ASSEMBLY__
> >
> >  #include <linux/bitfield.h>
> > +#include <linux/kasan.h>
> >  #include <linux/page-flags.h>
> > +#include <linux/sched.h>
> >  #include <linux/types.h>
> >
> >  #include <asm/pgtable-types.h>
> > @@ -86,6 +88,23 @@ static inline int mte_ptrace_copy_tags(struct task_struct *child,
> >
> >  #endif /* CONFIG_ARM64_MTE */
> >
> > +static inline void mte_disable_tco_entry(struct task_struct *task)
> > +{
> > +     /*
> > +      * Re-enable tag checking (TCO set on exception entry). This is only
> > +      * necessary if MTE is enabled in either the kernel or the userspace
> > +      * task in synchronous mode. With MTE disabled in the kernel and
> > +      * disabled or asynchronous in userspace, tag check faults (including in
> > +      * uaccesses) are not reported, therefore there is no need to re-enable
> > +      * checking. This is beneficial on microarchitectures where re-enabling
> > +      * TCO is expensive.
> > +      */
>
> I'd add a note here that the 1ULL << SCTLR_EL1_TCF0_SHIFT is meant to
> check for both synchronous and asymmetric modes even if we don't have
> the latter supporting the user yet. We do have the definitions already.

Done in v4.

> > +     if (kasan_hw_tags_enabled() ||
> > +         (system_supports_mte() &&
> > +          (task->thread.sctlr_user & (1UL << SCTLR_EL1_TCF0_SHIFT))))
> > +             asm volatile(SET_PSTATE_TCO(0));
> > +}
>
> Does it make a difference in code generation if you place a:
>
>         if (!system_supports_mte())
>                 return;
>
> at the beginning of the function (and remove the subsequent check)? It's
> probably also easier to read, though the code generation depends on the
> likely/unlikely choices for the static branches involved.

Yes and this ends up making the patch cause a small speedup on the
DragonBoard: 245.1ns to 244.5ns on the small cores and 151.4ns to
148.4ns on the large cores. The numbers on the MTE-enabled hardware
don't change too much.

> > diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
> > index f418ebc65f95..5345587f3384 100644
> > --- a/arch/arm64/kernel/mte.c
> > +++ b/arch/arm64/kernel/mte.c
> > @@ -252,6 +252,7 @@ void mte_thread_switch(struct task_struct *next)
> >
> >       mte_update_sctlr_user(next);
> >       mte_update_gcr_excl(next);
> > +     mte_disable_tco_entry(next);
>
> Maybe a one-line comment here that TCO may not have been disabled on
> exception entry for the current task.

Done in v4.

> Otherwise it looks good to me:
>
> Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>

Thanks.

Peter
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h
index 075539f5f1c8..5352db4c0f45 100644
--- a/arch/arm64/include/asm/mte.h
+++ b/arch/arm64/include/asm/mte.h
@@ -11,7 +11,9 @@ 
 #ifndef __ASSEMBLY__
 
 #include <linux/bitfield.h>
+#include <linux/kasan.h>
 #include <linux/page-flags.h>
+#include <linux/sched.h>
 #include <linux/types.h>
 
 #include <asm/pgtable-types.h>
@@ -86,6 +88,23 @@  static inline int mte_ptrace_copy_tags(struct task_struct *child,
 
 #endif /* CONFIG_ARM64_MTE */
 
+static inline void mte_disable_tco_entry(struct task_struct *task)
+{
+	/*
+	 * Re-enable tag checking (TCO set on exception entry). This is only
+	 * necessary if MTE is enabled in either the kernel or the userspace
+	 * task in synchronous mode. With MTE disabled in the kernel and
+	 * disabled or asynchronous in userspace, tag check faults (including in
+	 * uaccesses) are not reported, therefore there is no need to re-enable
+	 * checking. This is beneficial on microarchitectures where re-enabling
+	 * TCO is expensive.
+	 */
+	if (kasan_hw_tags_enabled() ||
+	    (system_supports_mte() &&
+	     (task->thread.sctlr_user & (1UL << SCTLR_EL1_TCF0_SHIFT))))
+		asm volatile(SET_PSTATE_TCO(0));
+}
+
 #ifdef CONFIG_KASAN_HW_TAGS
 /* Whether the MTE asynchronous mode is enabled. */
 DECLARE_STATIC_KEY_FALSE(mte_async_or_asymm_mode);
diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
index ef7fcefb96bd..7093b578e325 100644
--- a/arch/arm64/kernel/entry-common.c
+++ b/arch/arm64/kernel/entry-common.c
@@ -6,6 +6,7 @@ 
  */
 
 #include <linux/context_tracking.h>
+#include <linux/kasan.h>
 #include <linux/linkage.h>
 #include <linux/lockdep.h>
 #include <linux/ptrace.h>
@@ -56,6 +57,7 @@  static void noinstr enter_from_kernel_mode(struct pt_regs *regs)
 {
 	__enter_from_kernel_mode(regs);
 	mte_check_tfsr_entry();
+	mte_disable_tco_entry(current);
 }
 
 /*
@@ -103,6 +105,7 @@  static __always_inline void __enter_from_user_mode(void)
 	CT_WARN_ON(ct_state() != CONTEXT_USER);
 	user_exit_irqoff();
 	trace_hardirqs_off_finish();
+	mte_disable_tco_entry(current);
 }
 
 static __always_inline void enter_from_user_mode(struct pt_regs *regs)
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 772ec2ecf488..e1013a83d4f0 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -308,13 +308,6 @@  alternative_if ARM64_HAS_IRQ_PRIO_MASKING
 	msr_s	SYS_ICC_PMR_EL1, x20
 alternative_else_nop_endif
 
-	/* Re-enable tag checking (TCO set on exception entry) */
-#ifdef CONFIG_ARM64_MTE
-alternative_if ARM64_MTE
-	SET_PSTATE_TCO(0)
-alternative_else_nop_endif
-#endif
-
 	/*
 	 * Registers that may be useful after this macro is invoked:
 	 *
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index f418ebc65f95..5345587f3384 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -252,6 +252,7 @@  void mte_thread_switch(struct task_struct *next)
 
 	mte_update_sctlr_user(next);
 	mte_update_gcr_excl(next);
+	mte_disable_tco_entry(next);
 
 	/*
 	 * Check if an async tag exception occurred at EL1.