diff mbox series

[5/5] arm64: lib: Use MOPS for copy_page() and clear_page()

Message ID 20240930161051.3777828-6-kristina.martsenko@arm.com (mailing list archive)
State New, archived
Headers show
Series arm64: Use memory copy instructions in kernel routines | expand

Commit Message

Kristina Martsenko Sept. 30, 2024, 4:10 p.m. UTC
Similarly to what was done to the memcpy() routines, make copy_page()
and clear_page() also use the Armv8.8 FEAT_MOPS instructions.

Note: For copy_page() this uses the CPY* instructions instead of CPYF*
as CPYF* doesn't allow src and dst to be equal. It's not clear if
copy_page() needs to allow equal src and dst but it has worked so far
with the current implementation and there is no documentation forbidding
it.

Note, the unoptimized version of copy_page() in assembler.h is left as
it is.

Signed-off-by: Kristina Martsenko <kristina.martsenko@arm.com>
---
 arch/arm64/lib/clear_page.S | 13 +++++++++++++
 arch/arm64/lib/copy_page.S  | 13 +++++++++++++
 2 files changed, 26 insertions(+)

Comments

Catalin Marinas Oct. 2, 2024, 3:37 p.m. UTC | #1
On Mon, Sep 30, 2024 at 05:10:51PM +0100, Kristina Martsenko wrote:
> Similarly to what was done to the memcpy() routines, make copy_page()
> and clear_page() also use the Armv8.8 FEAT_MOPS instructions.
> 
> Note: For copy_page() this uses the CPY* instructions instead of CPYF*
> as CPYF* doesn't allow src and dst to be equal. It's not clear if
> copy_page() needs to allow equal src and dst but it has worked so far
> with the current implementation and there is no documentation forbidding
> it.

When we get real hardware, if CPYF* is faster we should switch to these
instructions. I wouldn't expect source and destination to be the same
but we can add a check.

>  SYM_FUNC_START(__pi_copy_page)
> +#ifdef CONFIG_AS_HAS_MOPS
> +	.arch_extension mops
> +alternative_if_not ARM64_HAS_MOPS
> +	b	.Lno_mops
> +alternative_else_nop_endif

Same comment as on the previous patch w.r.t. the branch.
diff mbox series

Patch

diff --git a/arch/arm64/lib/clear_page.S b/arch/arm64/lib/clear_page.S
index ebde40e7fa2b..bd6f7d5eb6eb 100644
--- a/arch/arm64/lib/clear_page.S
+++ b/arch/arm64/lib/clear_page.S
@@ -15,6 +15,19 @@ 
  *	x0 - dest
  */
 SYM_FUNC_START(__pi_clear_page)
+#ifdef CONFIG_AS_HAS_MOPS
+	.arch_extension mops
+alternative_if_not ARM64_HAS_MOPS
+	b	.Lno_mops
+alternative_else_nop_endif
+
+	mov	x1, #PAGE_SIZE
+	setpn	[x0]!, x1!, xzr
+	setmn	[x0]!, x1!, xzr
+	seten	[x0]!, x1!, xzr
+	ret
+.Lno_mops:
+#endif
 	mrs	x1, dczid_el0
 	tbnz	x1, #4, 2f	/* Branch if DC ZVA is prohibited */
 	and	w1, w1, #0xf
diff --git a/arch/arm64/lib/copy_page.S b/arch/arm64/lib/copy_page.S
index 6a56d7cf309d..e6374e7e5511 100644
--- a/arch/arm64/lib/copy_page.S
+++ b/arch/arm64/lib/copy_page.S
@@ -18,6 +18,19 @@ 
  *	x1 - src
  */
 SYM_FUNC_START(__pi_copy_page)
+#ifdef CONFIG_AS_HAS_MOPS
+	.arch_extension mops
+alternative_if_not ARM64_HAS_MOPS
+	b	.Lno_mops
+alternative_else_nop_endif
+
+	mov	x2, #PAGE_SIZE
+	cpypwn	[x0]!, [x1]!, x2!
+	cpymwn	[x0]!, [x1]!, x2!
+	cpyewn	[x0]!, [x1]!, x2!
+	ret
+.Lno_mops:
+#endif
 	ldp	x2, x3, [x1]
 	ldp	x4, x5, [x1, #16]
 	ldp	x6, x7, [x1, #32]