Message ID | 20211018190858.2119209-5-broonie@kernel.org (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | arm64/sme: Initial support for the Scalable Matrix Extension | expand |
On Mon, Oct 18, 2021 at 08:08:20PM +0100, Mark Brown wrote: > SME introduces streaming SVE mode in which FFR is not present and the > instructions for accessing it UNDEF. In preparation for handling this > update the low level SVE state access functions to take a flag specifying > if FFR should be handled. When saving the register state we store a zero > for FFR to guard against uninitialized data being read. No behaviour change > should be introduced by this patch. > > Signed-off-by: Mark Brown <broonie@kernel.org> > --- > arch/arm64/include/asm/fpsimd.h | 6 +++--- > arch/arm64/include/asm/fpsimdmacros.h | 20 ++++++++++++++------ > arch/arm64/kernel/entry-fpsimd.S | 17 +++++++++++------ > arch/arm64/kernel/fpsimd.c | 10 ++++++---- > arch/arm64/kvm/hyp/fpsimd.S | 6 ++++-- > 5 files changed, 38 insertions(+), 21 deletions(-) > > diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h > index 917ecc301d1d..7f8a44a9a5e6 100644 > --- a/arch/arm64/include/asm/fpsimd.h > +++ b/arch/arm64/include/asm/fpsimd.h > @@ -65,10 +65,10 @@ static inline void *sve_pffr(struct thread_struct *thread) > return (char *)thread->sve_state + sve_ffr_offset(thread->sve_vl); > } > > -extern void sve_save_state(void *state, u32 *pfpsr); > +extern void sve_save_state(void *state, u32 *pfpsr, int save_ffr); > extern void sve_load_state(void const *state, u32 const *pfpsr, > - unsigned long vq_minus_1); > -extern void sve_flush_live(unsigned long vq_minus_1); > + int restore_ffr, unsigned long vq_minus_1); > +extern void sve_flush_live(bool flush_ffr, unsigned long vq_minus_1); > extern unsigned int sve_get_vl(void); > extern void sve_set_vq(unsigned long vq_minus_1); > > diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h > index 00a2c0b69c2b..84d8cb7b07fa 100644 > --- a/arch/arm64/include/asm/fpsimdmacros.h > +++ b/arch/arm64/include/asm/fpsimdmacros.h > @@ -217,28 +217,36 @@ > .macro sve_flush_z > _for n, 0, 31, _sve_flush_z \n > .endm > -.macro sve_flush_p_ffr > +.macro sve_flush_p > _for n, 0, 15, _sve_pfalse \n > +.endm > +.macro sve_flush_ffr > _sve_wrffr 0 > .endm > > -.macro sve_save nxbase, xpfpsr, nxtmp > +.macro sve_save nxbase, xpfpsr, save_ffr, nxtmp > _for n, 0, 31, _sve_str_v \n, \nxbase, \n - 34 > _for n, 0, 15, _sve_str_p \n, \nxbase, \n - 16 > + cbz \save_ffr, 921f > _sve_rdffr 0 > _sve_str_p 0, \nxbase > _sve_ldr_p 0, \nxbase, -16 > - > + b 922f > +921: > + str xzr, [x\nxbase, #0] // Zero out FFR You can drop the '#0' here. > @@ -72,12 +74,15 @@ SYM_FUNC_END(sve_set_vq) > * VQ must already be configured by caller, any further updates of VQ > * will need to ensure that the register state remains valid. > * > - * x0 = VQ - 1 > + * x0 = include FFR? > + * x1 = VQ - 1 > */ > SYM_FUNC_START(sve_flush_live) > - cbz x0, 1f // A VQ-1 of 0 is 128 bits so no extra Z state > + cbz x1, 1f // A VQ-1 of 0 is 128 bits so no extra Z state > sve_flush_z > -1: sve_flush_p_ffr > +1: cbz x0, 2f > + sve_flush_p > +2: sve_flush_ffr > ret I'm confused by this -- x0 seems to control whether or not we flush the predicate registers rather then ffr. > SYM_FUNC_END(sve_flush_live) > > diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c > index 0f6df1ece618..3d5d243c3f1c 100644 > --- a/arch/arm64/kernel/fpsimd.c > +++ b/arch/arm64/kernel/fpsimd.c > @@ -289,7 +289,7 @@ static void task_fpsimd_load(void) > > if (IS_ENABLED(CONFIG_ARM64_SVE) && test_thread_flag(TIF_SVE)) > sve_load_state(sve_pffr(¤t->thread), > - ¤t->thread.uw.fpsimd_state.fpsr, > + ¤t->thread.uw.fpsimd_state.fpsr, true, > sve_vq_from_vl(current->thread.sve_vl) - 1); > else > fpsimd_load_state(¤t->thread.uw.fpsimd_state); > @@ -325,7 +325,7 @@ static void fpsimd_save(void) > > sve_save_state((char *)last->sve_state + > sve_ffr_offset(last->sve_vl), > - &last->st->fpsr); > + &last->st->fpsr, true); > } else { > fpsimd_save_state(last->st); > } > @@ -962,7 +962,7 @@ void do_sve_acc(unsigned int esr, struct pt_regs *regs) > unsigned long vq_minus_one = > sve_vq_from_vl(current->thread.sve_vl) - 1; > sve_set_vq(vq_minus_one); > - sve_flush_live(vq_minus_one); > + sve_flush_live(true, vq_minus_one); What does the pcs say about passing bools in registers? Can we guarantee that false is a 64-bit zero? Will
On Tue, Oct 19, 2021 at 11:14:47AM +0100, Will Deacon wrote: > > -1: sve_flush_p_ffr > > +1: cbz x0, 2f > > + sve_flush_p > > +2: sve_flush_ffr > > ret > I'm confused by this -- x0 seems to control whether or not we flush the > predicate registers rather then ffr. Yes, that's buggy - it got broken with rebasing for skipping the Z register flush. Thanks for spotting, sorry about that.
On Tue, Oct 19, 2021 at 11:14:47AM +0100, Will Deacon wrote: > On Mon, Oct 18, 2021 at 08:08:20PM +0100, Mark Brown wrote: > > SYM_FUNC_START(sve_flush_live) > > - cbz x0, 1f // A VQ-1 of 0 is 128 bits so no extra Z state > > + cbz x1, 1f // A VQ-1 of 0 is 128 bits so no extra Z state > > sve_flush_z > > -1: sve_flush_p_ffr > > +1: cbz x0, 2f > > + sve_flush_p > > +2: sve_flush_ffr > > ret > > @@ -962,7 +962,7 @@ void do_sve_acc(unsigned int esr, struct pt_regs *regs) > > unsigned long vq_minus_one = > > sve_vq_from_vl(current->thread.sve_vl) - 1; > > sve_set_vq(vq_minus_one); > > - sve_flush_live(vq_minus_one); > > + sve_flush_live(true, vq_minus_one); > > What does the pcs say about passing bools in registers? Can we guarantee > that false is a 64-bit zero? Per usual rules, bits [63:8] can be arbitrary -- AAPCS64 leaves it to the callee to extend values, with the upper bits being arbitrary, and it maps _Bool/bool to unsigned char, which covers bits [7:0]. So a bool false in a register is not guaranteed to be a 64-bit zero. But since it *is* guarnateed to be either 0 or 1, we can use TBZ/TBNZ instead of CBZ/CBNZ. Either that, or extend it to a wider type in the function prototype. The test below shows clang and GCC both agree with that (though this old GCC seems to do unnecessary zero extension as a caller): | [mark@gravadlaks:~]% cat bool.c | #include <stdbool.h> | | void callee_bool(bool b); | | void callee_unsigned_int(unsigned int i); | | void caller_unsigned_long(unsigned long l) | { | unsigned long tmp = l & 0xffffffff; | | if (tmp) | callee_unsigned_int(tmp); | else | callee_bool(tmp); | } | | unsigned long bool_to_unsigned_long(bool b) | { | return b; | } | [mark@gravadlaks:~]% gcc --version | gcc (Debian 8.3.0-6) 8.3.0 | Copyright (C) 2018 Free Software Foundation, Inc. | This is free software; see the source for copying conditions. There is NO | warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | | [mark@gravadlaks:~]% gcc -c bool.c -O3 | [mark@gravadlaks:~]% objdump -d bool.o | | bool.o: file format elf64-littleaarch64 | | | Disassembly of section .text: | | 0000000000000000 <caller_unsigned_long>: | 0: 34000040 cbz w0, 8 <caller_unsigned_long+0x8> | 4: 14000000 b 0 <callee_unsigned_int> | 8: 52800000 mov w0, #0x0 // #0 | c: 14000000 b 0 <callee_bool> | | 0000000000000010 <bool_to_unsigned_long>: | 10: 92401c00 and x0, x0, #0xff | 14: d65f03c0 ret | [mark@gravadlaks:~]% clang --version | clang version 7.0.1-8+deb10u2 (tags/RELEASE_701/final) | Target: aarch64-unknown-linux-gnu | Thread model: posix | InstalledDir: /usr/bin | [mark@gravadlaks:~]% clang -c bool.c -O3 | [mark@gravadlaks:~]% objdump -d bool.o | | bool.o: file format elf64-littleaarch64 | | | Disassembly of section .text: | | 0000000000000000 <caller_unsigned_long>: | 0: 34000040 cbz w0, 8 <caller_unsigned_long+0x8> | 4: 14000000 b 0 <callee_unsigned_int> | 8: 14000000 b 0 <callee_bool> | | 000000000000000c <bool_to_unsigned_long>: | c: 92400000 and x0, x0, #0x1 | 10: d65f03c0 ret Thanks, Mark.
On Tue, Oct 19, 2021 at 03:39:11PM +0100, Mark Rutland wrote: > On Tue, Oct 19, 2021 at 11:14:47AM +0100, Will Deacon wrote: > > > + sve_flush_live(true, vq_minus_one); > > What does the pcs say about passing bools in registers? Can we guarantee > > that false is a 64-bit zero? > Per usual rules, bits [63:8] can be arbitrary -- AAPCS64 leaves it to the callee > to extend values, with the upper bits being arbitrary, and it maps _Bool/bool > to unsigned char, which covers bits [7:0]. > So a bool false in a register is not guaranteed to be a 64-bit zero. But > since it *is* guarnateed to be either 0 or 1, we can use TBZ/TBNZ > instead of CBZ/CBNZ. Either that, or extend it to a wider type in the > function prototype. I'll change it to tbz - if we use a wider type then people will notice using true/false with it in the code.
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 917ecc301d1d..7f8a44a9a5e6 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -65,10 +65,10 @@ static inline void *sve_pffr(struct thread_struct *thread) return (char *)thread->sve_state + sve_ffr_offset(thread->sve_vl); } -extern void sve_save_state(void *state, u32 *pfpsr); +extern void sve_save_state(void *state, u32 *pfpsr, int save_ffr); extern void sve_load_state(void const *state, u32 const *pfpsr, - unsigned long vq_minus_1); -extern void sve_flush_live(unsigned long vq_minus_1); + int restore_ffr, unsigned long vq_minus_1); +extern void sve_flush_live(bool flush_ffr, unsigned long vq_minus_1); extern unsigned int sve_get_vl(void); extern void sve_set_vq(unsigned long vq_minus_1); diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h index 00a2c0b69c2b..84d8cb7b07fa 100644 --- a/arch/arm64/include/asm/fpsimdmacros.h +++ b/arch/arm64/include/asm/fpsimdmacros.h @@ -217,28 +217,36 @@ .macro sve_flush_z _for n, 0, 31, _sve_flush_z \n .endm -.macro sve_flush_p_ffr +.macro sve_flush_p _for n, 0, 15, _sve_pfalse \n +.endm +.macro sve_flush_ffr _sve_wrffr 0 .endm -.macro sve_save nxbase, xpfpsr, nxtmp +.macro sve_save nxbase, xpfpsr, save_ffr, nxtmp _for n, 0, 31, _sve_str_v \n, \nxbase, \n - 34 _for n, 0, 15, _sve_str_p \n, \nxbase, \n - 16 + cbz \save_ffr, 921f _sve_rdffr 0 _sve_str_p 0, \nxbase _sve_ldr_p 0, \nxbase, -16 - + b 922f +921: + str xzr, [x\nxbase, #0] // Zero out FFR +922: mrs x\nxtmp, fpsr str w\nxtmp, [\xpfpsr] mrs x\nxtmp, fpcr str w\nxtmp, [\xpfpsr, #4] .endm -.macro __sve_load nxbase, xpfpsr, nxtmp +.macro __sve_load nxbase, xpfpsr, restore_ffr, nxtmp _for n, 0, 31, _sve_ldr_v \n, \nxbase, \n - 34 + cbz \restore_ffr, 921f _sve_ldr_p 0, \nxbase _sve_wrffr 0 +921: _for n, 0, 15, _sve_ldr_p \n, \nxbase, \n - 16 ldr w\nxtmp, [\xpfpsr] @@ -247,7 +255,7 @@ msr fpcr, x\nxtmp .endm -.macro sve_load nxbase, xpfpsr, xvqminus1, nxtmp, xtmp2 +.macro sve_load nxbase, xpfpsr, restore_ffr, xvqminus1, nxtmp, xtmp2 sve_load_vq \xvqminus1, x\nxtmp, \xtmp2 - __sve_load \nxbase, \xpfpsr, \nxtmp + __sve_load \nxbase, \xpfpsr, \restore_ffr, \nxtmp .endm diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S index afbf7dc47e1d..13c27465bfa8 100644 --- a/arch/arm64/kernel/entry-fpsimd.S +++ b/arch/arm64/kernel/entry-fpsimd.S @@ -38,9 +38,10 @@ SYM_FUNC_END(fpsimd_load_state) * * x0 - pointer to buffer for state * x1 - pointer to storage for FPSR + * x2 - Save FFR if non-zero */ SYM_FUNC_START(sve_save_state) - sve_save 0, x1, 2 + sve_save 0, x1, x2, 3 ret SYM_FUNC_END(sve_save_state) @@ -49,10 +50,11 @@ SYM_FUNC_END(sve_save_state) * * x0 - pointer to buffer for state * x1 - pointer to storage for FPSR - * x2 - VQ-1 + * x2 - Restore FFR if non-zero + * x3 - VQ-1 */ SYM_FUNC_START(sve_load_state) - sve_load 0, x1, x2, 3, x4 + sve_load 0, x1, x2, x3, 4, x5 ret SYM_FUNC_END(sve_load_state) @@ -72,12 +74,15 @@ SYM_FUNC_END(sve_set_vq) * VQ must already be configured by caller, any further updates of VQ * will need to ensure that the register state remains valid. * - * x0 = VQ - 1 + * x0 = include FFR? + * x1 = VQ - 1 */ SYM_FUNC_START(sve_flush_live) - cbz x0, 1f // A VQ-1 of 0 is 128 bits so no extra Z state + cbz x1, 1f // A VQ-1 of 0 is 128 bits so no extra Z state sve_flush_z -1: sve_flush_p_ffr +1: cbz x0, 2f + sve_flush_p +2: sve_flush_ffr ret SYM_FUNC_END(sve_flush_live) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 0f6df1ece618..3d5d243c3f1c 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -289,7 +289,7 @@ static void task_fpsimd_load(void) if (IS_ENABLED(CONFIG_ARM64_SVE) && test_thread_flag(TIF_SVE)) sve_load_state(sve_pffr(¤t->thread), - ¤t->thread.uw.fpsimd_state.fpsr, + ¤t->thread.uw.fpsimd_state.fpsr, true, sve_vq_from_vl(current->thread.sve_vl) - 1); else fpsimd_load_state(¤t->thread.uw.fpsimd_state); @@ -325,7 +325,7 @@ static void fpsimd_save(void) sve_save_state((char *)last->sve_state + sve_ffr_offset(last->sve_vl), - &last->st->fpsr); + &last->st->fpsr, true); } else { fpsimd_save_state(last->st); } @@ -962,7 +962,7 @@ void do_sve_acc(unsigned int esr, struct pt_regs *regs) unsigned long vq_minus_one = sve_vq_from_vl(current->thread.sve_vl) - 1; sve_set_vq(vq_minus_one); - sve_flush_live(vq_minus_one); + sve_flush_live(true, vq_minus_one); fpsimd_bind_task_to_cpu(); } else { fpsimd_to_sve(current); @@ -1356,7 +1356,8 @@ void __efi_fpsimd_begin(void) __this_cpu_write(efi_sve_state_used, true); sve_save_state(sve_state + sve_ffr_offset(sve_max_vl), - &this_cpu_ptr(&efi_fpsimd_state)->fpsr); + &this_cpu_ptr(&efi_fpsimd_state)->fpsr, + true); } else { fpsimd_save_state(this_cpu_ptr(&efi_fpsimd_state)); } @@ -1382,6 +1383,7 @@ void __efi_fpsimd_end(void) sve_load_state(sve_state + sve_ffr_offset(sve_max_vl), &this_cpu_ptr(&efi_fpsimd_state)->fpsr, + true, sve_vq_from_vl(sve_get_vl()) - 1); __this_cpu_write(efi_sve_state_used, false); diff --git a/arch/arm64/kvm/hyp/fpsimd.S b/arch/arm64/kvm/hyp/fpsimd.S index 3c635929771a..1bb3b04b84e6 100644 --- a/arch/arm64/kvm/hyp/fpsimd.S +++ b/arch/arm64/kvm/hyp/fpsimd.S @@ -21,11 +21,13 @@ SYM_FUNC_START(__fpsimd_restore_state) SYM_FUNC_END(__fpsimd_restore_state) SYM_FUNC_START(__sve_restore_state) - __sve_load 0, x1, 2 + mov x2, #1 + __sve_load 0, x1, x2, 3 ret SYM_FUNC_END(__sve_restore_state) SYM_FUNC_START(__sve_save_state) - sve_save 0, x1, 2 + mov x2, #1 + sve_save 0, x1, x2, 3 ret SYM_FUNC_END(__sve_save_state)
SME introduces streaming SVE mode in which FFR is not present and the instructions for accessing it UNDEF. In preparation for handling this update the low level SVE state access functions to take a flag specifying if FFR should be handled. When saving the register state we store a zero for FFR to guard against uninitialized data being read. No behaviour change should be introduced by this patch. Signed-off-by: Mark Brown <broonie@kernel.org> --- arch/arm64/include/asm/fpsimd.h | 6 +++--- arch/arm64/include/asm/fpsimdmacros.h | 20 ++++++++++++++------ arch/arm64/kernel/entry-fpsimd.S | 17 +++++++++++------ arch/arm64/kernel/fpsimd.c | 10 ++++++---- arch/arm64/kvm/hyp/fpsimd.S | 6 ++++-- 5 files changed, 38 insertions(+), 21 deletions(-)