diff mbox series

[v3] arm64: mte: optimize GCR_EL1 modification on kernel entry/exit

Message ID 20210709014941.2014210-1-pcc@google.com (mailing list archive)
State New, archived
Headers show
Series [v3] arm64: mte: optimize GCR_EL1 modification on kernel entry/exit | expand

Commit Message

Peter Collingbourne July 9, 2021, 1:49 a.m. UTC
Accessing GCR_EL1 and issuing an ISB can be expensive on some
microarchitectures. Although we must write to GCR_EL1, we can
restructure the code to avoid reading from it because the new value
can be derived entirely from the exclusion mask, which is already in
a GPR. Do so.

Furthermore, although an ISB is required in order to make this system
register update effective, and the same is true for PAC-related updates
to SCTLR_EL1 or APIAKey{Hi,Lo}_EL1, we issue two ISBs on machines
that support both features while we only need to issue one. To avoid
the unnecessary additional ISB, remove the ISBs from the PAC and
MTE-specific alternative blocks and add an ISB in a separate block
that is activated only if either feature is supported.

Signed-off-by: Peter Collingbourne <pcc@google.com>
Link: https://linux-review.googlesource.com/id/I560a190a74176ca4cc5191dad08f77f6b1577c75
---
v3:
- go back to modifying on entry/exit; optimize that path instead

v2:
- rebase onto v9 of the tag checking mode preference series

 arch/arm64/kernel/cpufeature.c | 19 +++++++++++++++++++
 arch/arm64/kernel/entry.S      | 23 +++++++++++++----------
 arch/arm64/tools/cpucaps       |  1 +
 3 files changed, 33 insertions(+), 10 deletions(-)

Comments

Will Deacon July 13, 2021, 3:52 p.m. UTC | #1
On Thu, Jul 08, 2021 at 06:49:41PM -0700, Peter Collingbourne wrote:
> Accessing GCR_EL1 and issuing an ISB can be expensive on some
> microarchitectures. Although we must write to GCR_EL1, we can
> restructure the code to avoid reading from it because the new value
> can be derived entirely from the exclusion mask, which is already in
> a GPR. Do so.
> 
> Furthermore, although an ISB is required in order to make this system
> register update effective, and the same is true for PAC-related updates
> to SCTLR_EL1 or APIAKey{Hi,Lo}_EL1, we issue two ISBs on machines
> that support both features while we only need to issue one. To avoid
> the unnecessary additional ISB, remove the ISBs from the PAC and
> MTE-specific alternative blocks and add an ISB in a separate block
> that is activated only if either feature is supported.

Sorry to be a pain, but can you split this into two patches, please? I
think you're making two distinct changes, and it would be easier to review
and discuss them separately (it would also be interesting to know the
relative performance improvement you get from them).

> diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
> index efed2830d141..740e09ade2ea 100644
> --- a/arch/arm64/kernel/cpufeature.c
> +++ b/arch/arm64/kernel/cpufeature.c
> @@ -1717,6 +1717,20 @@ static bool has_generic_auth(const struct arm64_cpu_capabilities *entry,
>  }
>  #endif /* CONFIG_ARM64_PTR_AUTH */
>  
> +static bool has_address_auth_or_mte(const struct arm64_cpu_capabilities *entry,
> +				    int scope)
> +{
> +#ifdef CONFIG_ARM64_PTR_AUTH
> +	if (has_address_auth_metacap(entry, scope))
> +		return true;
> +#endif
> +#ifdef CONFIG_ARM64_MTE
> +	if (__system_matches_cap(ARM64_MTE))
> +		return true;
> +#endif
> +	return false;
> +}
> +
>  #ifdef CONFIG_ARM64_E0PD
>  static void cpu_enable_e0pd(struct arm64_cpu_capabilities const *cap)
>  {
> @@ -2218,6 +2232,11 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
>  		.matches = has_cpuid_feature,
>  		.min_field_value = 1,
>  	},
> +	{
> +		.capability = ARM64_HAS_ADDRESS_AUTH_OR_MTE,
> +		.type = ARM64_CPUCAP_BOOT_CPU_FEATURE,
> +		.matches = has_address_auth_or_mte,
> +	},

I'd rather avoid adding a new cap for this, as these features are entirely
unrelated in the architecture and if we end up piling more combinations of
features in here in the future then I fear it will become quite unwieldy.

Instead, how about we just use a conditional branch alongside the existing
capabilities? E.g.


	alternative_if ARM64_MTE
		isb
		b	1f
	alternative_else_nop_endif
	alternative_if ARM64_HAS_ADDRESS_AUTH
		isb
	alternative_else_nop_endif
1:

?

Failing that, maybe you could use alternative_cb to avoid the new
capability, but I'm not sure it's worth it.

Will
Peter Collingbourne July 14, 2021, 1:37 a.m. UTC | #2
On Tue, Jul 13, 2021 at 8:52 AM Will Deacon <will@kernel.org> wrote:
>
> On Thu, Jul 08, 2021 at 06:49:41PM -0700, Peter Collingbourne wrote:
> > Accessing GCR_EL1 and issuing an ISB can be expensive on some
> > microarchitectures. Although we must write to GCR_EL1, we can
> > restructure the code to avoid reading from it because the new value
> > can be derived entirely from the exclusion mask, which is already in
> > a GPR. Do so.
> >
> > Furthermore, although an ISB is required in order to make this system
> > register update effective, and the same is true for PAC-related updates
> > to SCTLR_EL1 or APIAKey{Hi,Lo}_EL1, we issue two ISBs on machines
> > that support both features while we only need to issue one. To avoid
> > the unnecessary additional ISB, remove the ISBs from the PAC and
> > MTE-specific alternative blocks and add an ISB in a separate block
> > that is activated only if either feature is supported.
>
> Sorry to be a pain, but can you split this into two patches, please? I
> think you're making two distinct changes, and it would be easier to review
> and discuss them separately (it would also be interesting to know the
> relative performance improvement you get from them).

Fair enough, done.

> > diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
> > index efed2830d141..740e09ade2ea 100644
> > --- a/arch/arm64/kernel/cpufeature.c
> > +++ b/arch/arm64/kernel/cpufeature.c
> > @@ -1717,6 +1717,20 @@ static bool has_generic_auth(const struct arm64_cpu_capabilities *entry,
> >  }
> >  #endif /* CONFIG_ARM64_PTR_AUTH */
> >
> > +static bool has_address_auth_or_mte(const struct arm64_cpu_capabilities *entry,
> > +                                 int scope)
> > +{
> > +#ifdef CONFIG_ARM64_PTR_AUTH
> > +     if (has_address_auth_metacap(entry, scope))
> > +             return true;
> > +#endif
> > +#ifdef CONFIG_ARM64_MTE
> > +     if (__system_matches_cap(ARM64_MTE))
> > +             return true;
> > +#endif
> > +     return false;
> > +}
> > +
> >  #ifdef CONFIG_ARM64_E0PD
> >  static void cpu_enable_e0pd(struct arm64_cpu_capabilities const *cap)
> >  {
> > @@ -2218,6 +2232,11 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
> >               .matches = has_cpuid_feature,
> >               .min_field_value = 1,
> >       },
> > +     {
> > +             .capability = ARM64_HAS_ADDRESS_AUTH_OR_MTE,
> > +             .type = ARM64_CPUCAP_BOOT_CPU_FEATURE,
> > +             .matches = has_address_auth_or_mte,
> > +     },
>
> I'd rather avoid adding a new cap for this, as these features are entirely
> unrelated in the architecture and if we end up piling more combinations of
> features in here in the future then I fear it will become quite unwieldy.

The idea is that we wouldn't change this to be
ARM64_HAS_ADDRESS_AUTH_OR_MTE_OR_SOMENEWFEATURE but would instead
rename it to ARM64_REQUIRES_ISB_ON_ENTRY or something.

> Instead, how about we just use a conditional branch alongside the existing
> capabilities? E.g.
>
>
>         alternative_if ARM64_MTE
>                 isb
>                 b       1f
>         alternative_else_nop_endif
>         alternative_if ARM64_HAS_ADDRESS_AUTH
>                 isb
>         alternative_else_nop_endif
> 1:
>
> ?

That will work for now. I couldn't see a difference at 95% CI on my
hardware but I suspect that it will gradually get slower as more
features are added. It's something that we can solve later though,
e.g. using the new cap or the alternative_cb thing.

Peter
diff mbox series

Patch

diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index efed2830d141..740e09ade2ea 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1717,6 +1717,20 @@  static bool has_generic_auth(const struct arm64_cpu_capabilities *entry,
 }
 #endif /* CONFIG_ARM64_PTR_AUTH */
 
+static bool has_address_auth_or_mte(const struct arm64_cpu_capabilities *entry,
+				    int scope)
+{
+#ifdef CONFIG_ARM64_PTR_AUTH
+	if (has_address_auth_metacap(entry, scope))
+		return true;
+#endif
+#ifdef CONFIG_ARM64_MTE
+	if (__system_matches_cap(ARM64_MTE))
+		return true;
+#endif
+	return false;
+}
+
 #ifdef CONFIG_ARM64_E0PD
 static void cpu_enable_e0pd(struct arm64_cpu_capabilities const *cap)
 {
@@ -2218,6 +2232,11 @@  static const struct arm64_cpu_capabilities arm64_features[] = {
 		.matches = has_cpuid_feature,
 		.min_field_value = 1,
 	},
+	{
+		.capability = ARM64_HAS_ADDRESS_AUTH_OR_MTE,
+		.type = ARM64_CPUCAP_BOOT_CPU_FEATURE,
+		.matches = has_address_auth_or_mte,
+	},
 	{},
 };
 
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index ce59280355c5..deb3ee45b018 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -175,15 +175,11 @@  alternative_else_nop_endif
 #endif
 	.endm
 
-	.macro mte_set_gcr, tmp, tmp2
+	.macro mte_set_gcr, mte_ctrl, tmp
 #ifdef CONFIG_ARM64_MTE
-	/*
-	 * Calculate and set the exclude mask preserving
-	 * the RRND (bit[16]) setting.
-	 */
-	mrs_s	\tmp2, SYS_GCR_EL1
-	bfxil	\tmp2, \tmp, #MTE_CTRL_GCR_USER_EXCL_SHIFT, #16
-	msr_s	SYS_GCR_EL1, \tmp2
+	ubfx	\tmp, \mte_ctrl, #MTE_CTRL_GCR_USER_EXCL_SHIFT, #16
+	orr	\tmp, \tmp, #SYS_GCR_EL1_RRND
+	msr_s	SYS_GCR_EL1, \tmp
 #endif
 	.endm
 
@@ -195,7 +191,6 @@  alternative_else_nop_endif
 	ldr_l	\tmp, gcr_kernel_excl
 
 	mte_set_gcr \tmp, \tmp2
-	isb
 1:
 #endif
 	.endm
@@ -269,12 +264,20 @@  alternative_if ARM64_HAS_ADDRESS_AUTH
 	orr	x0, x0, SCTLR_ELx_ENIA
 	msr	sctlr_el1, x0
 2:
-	isb
 alternative_else_nop_endif
 #endif
 
 	mte_set_kernel_gcr x22, x23
 
+alternative_if ARM64_HAS_ADDRESS_AUTH_OR_MTE
+	/*
+	 * Any non-self-synchronizing system register updates required for
+	 * kernel entry should be placed before this point. If necessary,
+	 * the alternative condition should be adjusted.
+	 */
+	isb
+alternative_else_nop_endif
+
 	scs_load tsk, x20
 	.else
 	add	x21, sp, #PT_REGS_SIZE
diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
index 21fbdda7086e..a938260ae5bc 100644
--- a/arch/arm64/tools/cpucaps
+++ b/arch/arm64/tools/cpucaps
@@ -8,6 +8,7 @@  HAS_32BIT_EL1
 HAS_ADDRESS_AUTH
 HAS_ADDRESS_AUTH_ARCH
 HAS_ADDRESS_AUTH_IMP_DEF
+HAS_ADDRESS_AUTH_OR_MTE
 HAS_AMU_EXTN
 HAS_ARMv8_4_TTL
 HAS_CACHE_DIC