@@ -61,6 +61,10 @@ alternative_else_nop_endif
9999: x; \
_asm_extable_uaccess 9999b, l
+#define USER_CPY(l, uaccess_is_write, x...) \
+9999: x; \
+ _asm_extable_uaccess_cpy 9999b, l, uaccess_is_write
+
/*
* Generate the assembly for LDTR/STTR with exception table entries.
* This is complicated as there is no post-increment or pair versions of the
@@ -17,14 +17,27 @@
* Alignment fixed up by hardware.
*/
- .p2align 4
- // Alignment is for the loop, but since the prologue (including BTI)
- // is also 16 bytes we can keep any padding outside the function
SYM_FUNC_START(__arch_clear_user)
add x2, x0, x1
+
+#ifdef CONFIG_AS_HAS_MOPS
+ .arch_extension mops
+alternative_if_not ARM64_HAS_MOPS
+ b .Lno_mops
+alternative_else_nop_endif
+
+USER(9f, setpt [x0]!, x1!, xzr)
+USER(6f, setmt [x0]!, x1!, xzr)
+USER(6f, setet [x0]!, x1!, xzr)
+ mov x0, #0
+ ret
+.Lno_mops:
+#endif
+
subs x1, x1, #8
b.mi 2f
-1:
+
+1: .p2align 4
USER(9f, sttr xzr, [x0])
add x0, x0, #8
subs x1, x1, #8
@@ -47,6 +60,10 @@ USER(7f, sttrb wzr, [x2, #-1])
ret
// Exception fixups
+6: b.cs 9f
+ // Registers are in Option A format
+ add x0, x0, x1
+ b 9f
7: sub x0, x2, #5 // Adjust for faulting on the final byte...
8: add x0, x0, #4 // ...or the second word of the 4-7 byte case
9: sub x0, x2, x0
@@ -52,6 +52,13 @@
stp \reg1, \reg2, [\ptr], \val
.endm
+ .macro cpy1 dst, src, count
+ .arch_extension mops
+ USER_CPY(9997f, 0, cpyfprt [\dst]!, [\src]!, \count!)
+ USER_CPY(9996f, 0, cpyfmrt [\dst]!, [\src]!, \count!)
+ USER_CPY(9996f, 0, cpyfert [\dst]!, [\src]!, \count!)
+ .endm
+
end .req x5
srcin .req x15
SYM_FUNC_START(__arch_copy_from_user)
@@ -62,6 +69,9 @@ SYM_FUNC_START(__arch_copy_from_user)
ret
// Exception fixups
+9996: b.cs 9997f
+ // Registers are in Option A format
+ add dst, dst, count
9997: cmp dst, dstin
b.ne 9998f
// Before being absolutely sure we couldn't copy anything, try harder
@@ -40,6 +40,16 @@ D_l .req x13
D_h .req x14
mov dst, dstin
+
+#ifdef CONFIG_AS_HAS_MOPS
+alternative_if_not ARM64_HAS_MOPS
+ b .Lno_mops
+alternative_else_nop_endif
+ cpy1 dst, src, count
+ b .Lexitfunc
+.Lno_mops:
+#endif
+
cmp count, #16
/*When memory length is less than 16, the accessed are not aligned.*/
b.lo .Ltiny15
@@ -51,6 +51,13 @@
user_stp 9997f, \reg1, \reg2, \ptr, \val
.endm
+ .macro cpy1 dst, src, count
+ .arch_extension mops
+ USER_CPY(9997f, 1, cpyfpwt [\dst]!, [\src]!, \count!)
+ USER_CPY(9996f, 1, cpyfmwt [\dst]!, [\src]!, \count!)
+ USER_CPY(9996f, 1, cpyfewt [\dst]!, [\src]!, \count!)
+ .endm
+
end .req x5
srcin .req x15
SYM_FUNC_START(__arch_copy_to_user)
@@ -61,6 +68,9 @@ SYM_FUNC_START(__arch_copy_to_user)
ret
// Exception fixups
+9996: b.cs 9997f
+ // Registers are in Option A format
+ add dst, dst, count
9997: cmp dst, dstin
b.ne 9998f
// Before being absolutely sure we couldn't copy anything, try harder
Similarly to what was done with the memcpy() routines, make copy_to_user(), copy_from_user() and clear_user() also use the Armv8.8 FEAT_MOPS instructions. Both MOPS implementation options (A and B) are supported, including asymmetric systems. The exception fixup code fixes up the registers according to the option used. In case of a fault the routines return precisely how much was not copied (as required by the comment in include/linux/uaccess.h), as unprivileged versions of CPY/SET are guaranteed not to have written past the addresses reported in the GPRs. The MOPS instructions could possibly be inlined into callers (and patched to branch to the generic implementation if not detected; similarly to what x86 does), but as a first step this patch just uses them in the out-of-line routines. Signed-off-by: Kristina Martšenko <kristina.martsenko@arm.com> --- arch/arm64/include/asm/asm-uaccess.h | 4 ++++ arch/arm64/lib/clear_user.S | 25 +++++++++++++++++++++---- arch/arm64/lib/copy_from_user.S | 10 ++++++++++ arch/arm64/lib/copy_template.S | 10 ++++++++++ arch/arm64/lib/copy_to_user.S | 10 ++++++++++ 5 files changed, 55 insertions(+), 4 deletions(-)