Message ID | 20201005181804.1331237-1-jeremy.linton@arm.com (mailing list archive) |
---|---|
State | Not Applicable |
Delegated to: | Herbert Xu |
Headers | show |
Series | [BUG] arm64: bti: fix BTI to handle local indirect branches | expand |
On Mon, 5 Oct 2020 at 20:18, Jeremy Linton <jeremy.linton@arm.com> wrote: > > The AES code uses a 'br x7' as part of a function called by > a macro, that ends up needing a BTI_J as a target. Could we instead just drop the tail call, i.e, replace it with a ret and do a 'bl' after it returns? The indirect call does not really serve a purpose here anyway > Lets > define SYN_CODE_START_LOCAL() for this and replace the > SYM_FUNC_START_LOCAL with a SYM_FUNC_CODE_LOCAL in the AES block. > > Without this kernels compiled with the aes_neon_bs will OOPS > at boot. > > Bad mode in Synchronous Abort handler detected on CPU1, code 0x34000003 -- BTI > CPU: 1 PID: 265 Comm: cryptomgr_test Not tainted 5.8.11-300.fc33.aarch64 #1 > pstate: 20400c05 (nzCv daif +PAN -UAO BTYPE=j-) > pc : aesbs_encrypt8+0x0/0x5f0 [aes_neon_bs] > lr : aesbs_xts_encrypt+0x48/0xe0 [aes_neon_bs] > sp : ffff80001052b730 > x29: ffff80001052b730 x28: 0000000000000001 > x27: ffff0001ec8f4000 x26: ffff0001ec5d27b0 > x25: ffff0001ec8f4000 x24: ffff80001052bc00 > x23: 0000000000000018 x22: 000000000000000a > x21: ffff0001ec5d2100 x20: ffff0001ec8f4080 > x19: ffff0001ec8f4000 x18: 0000000000000004 > x17: 0000000000000000 x16: ffffadb4db85d310 > x15: 0000000000000200 x14: 0000000000000010 > x13: 0000000000000200 x12: ffff0001ec5d2100 > x11: 000000000000000a x10: 0000020000200000 > x9 : ffffadb4db7e7488 x8 : 0000000000000000 > x7 : ffffadb4614fb110 x6 : 0000000000000000 > x5 : ffff80001052bc00 x4 : 0000000000000020 > x3 : 000000000000000a x2 : ffff0001ec5d2100 > x1 : ffff0001ec8f4000 x0 : ffff0001ec8f4000 > Kernel panic - not syncing: bad mode > CPU: 1 PID: 265 Comm: cryptomgr_test Not tainted 5.8.11-300.fc33.aarch64 #1 > Call trace: > dump_backtrace+0x0/0x1c0 > show_stack+0x24/0x30 > dump_stack+0xc0/0x118 > panic+0x144/0x358 > arm64_serror_panic+0x0/0x98 > el1_sync_handler+0x60/0x110 > el1_sync+0xb4/0x180 > aesbs_encrypt8+0x0/0x5f0 [aes_neon_bs] > __xts_crypt+0xb0/0x2dc [aes_neon_bs] > xts_encrypt+0x28/0x3c [aes_neon_bs] > crypto_skcipher_encrypt+0x50/0x84 > simd_skcipher_encrypt+0xc8/0xe0 > crypto_skcipher_encrypt+0x50/0x84 > test_skcipher_vec_cfg+0x224/0x5f0 > test_skcipher+0xbc/0x120 > alg_test_skcipher+0xa0/0x1b0 > alg_test+0x3dc/0x47c > cryptomgr_test+0x38/0x60 > kthread+0x11c/0x120 > ret_from_fork+0x10/0x18 > SMP: stopping secondary CPUs > Kernel Offset: 0x2db4cb7e0000 from 0xffff800010000000 > PHYS_OFFSET: 0xffffcf2440000000 > CPU features: 0x7e0152,20c02020 > Memory Limit: none > ---[ end Kernel panic - not syncing: bad mode ]--- > > Fixes: commit 0e89640b640d ("crypto: arm64 - Use modern annotations for assembly functions") > Signed-off-by: Jeremy Linton <jeremy.linton@arm.com> > --- > arch/arm64/crypto/aes-neonbs-core.S | 8 ++++---- > arch/arm64/include/asm/linkage.h | 6 +++++- > 2 files changed, 9 insertions(+), 5 deletions(-) > > diff --git a/arch/arm64/crypto/aes-neonbs-core.S b/arch/arm64/crypto/aes-neonbs-core.S > index b357164379f6..76de87ee80e1 100644 > --- a/arch/arm64/crypto/aes-neonbs-core.S > +++ b/arch/arm64/crypto/aes-neonbs-core.S > @@ -428,7 +428,7 @@ SYM_FUNC_START(aesbs_convert_key) > SYM_FUNC_END(aesbs_convert_key) > > .align 4 > -SYM_FUNC_START_LOCAL(aesbs_encrypt8) > +SYM_CODE_START_LOCAL(aesbs_encrypt8) > ldr q9, [bskey], #16 // round 0 key > ldr q8, M0SR > ldr q24, SR > @@ -488,10 +488,10 @@ SYM_FUNC_START_LOCAL(aesbs_encrypt8) > eor v2.16b, v2.16b, v12.16b > eor v5.16b, v5.16b, v12.16b > ret > -SYM_FUNC_END(aesbs_encrypt8) > +SYM_END(aesbs_encrypt8) > > .align 4 > -SYM_FUNC_START_LOCAL(aesbs_decrypt8) > +SYM_CODE_START_LOCAL(aesbs_decrypt8) > lsl x9, rounds, #7 > add bskey, bskey, x9 > > @@ -553,7 +553,7 @@ SYM_FUNC_START_LOCAL(aesbs_decrypt8) > eor v3.16b, v3.16b, v12.16b > eor v5.16b, v5.16b, v12.16b > ret > -SYM_FUNC_END(aesbs_decrypt8) > +SYM_END(aesbs_decrypt8) > > /* > * aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, > diff --git a/arch/arm64/include/asm/linkage.h b/arch/arm64/include/asm/linkage.h > index ba89a9af820a..92c81ae8ac1e 100644 > --- a/arch/arm64/include/asm/linkage.h > +++ b/arch/arm64/include/asm/linkage.h > @@ -11,7 +11,8 @@ > * set the architecture version to v8.5 we use the hint instruction > * instead. > */ > -#define BTI_C hint 34 ; > +#define BTI_C hint 34 ; > +#define BTI_JC hint 38 ; > > /* > * When using in-kernel BTI we need to ensure that PCS-conformant assembly > @@ -42,6 +43,9 @@ > SYM_START(name, SYM_L_WEAK, SYM_A_NONE) \ > BTI_C > > +#define SYM_CODE_START_LOCAL(name) \ > + SYM_START(name, SYM_L_LOCAL, SYM_A_ALIGN) \ > + BTI_JC > #endif > > /* > -- > 2.25.4 >
Hi, On 10/5/20 1:54 PM, Ard Biesheuvel wrote: > On Mon, 5 Oct 2020 at 20:18, Jeremy Linton <jeremy.linton@arm.com> wrote: >> >> The AES code uses a 'br x7' as part of a function called by >> a macro, that ends up needing a BTI_J as a target. > > Could we instead just drop the tail call, i.e, replace it with a ret > and do a 'bl' after it returns? The indirect call does not really > serve a purpose here anyway Yes, that is an option, it adds an extra ret. Which probably doesn't mean much in most cases. I assumed this code was optimized this way because it mattered somewhere. > > >> Lets >> define SYN_CODE_START_LOCAL() for this and replace the >> SYM_FUNC_START_LOCAL with a SYM_FUNC_CODE_LOCAL in the AES block. ^ That should have read SYM_CODE_START_LOCAL. >> >> Without this kernels compiled with the aes_neon_bs will OOPS >> at boot. >> >> Bad mode in Synchronous Abort handler detected on CPU1, code 0x34000003 -- BTI >> CPU: 1 PID: 265 Comm: cryptomgr_test Not tainted 5.8.11-300.fc33.aarch64 #1 >> pstate: 20400c05 (nzCv daif +PAN -UAO BTYPE=j-) >> pc : aesbs_encrypt8+0x0/0x5f0 [aes_neon_bs] >> lr : aesbs_xts_encrypt+0x48/0xe0 [aes_neon_bs] >> sp : ffff80001052b730 >> x29: ffff80001052b730 x28: 0000000000000001 >> x27: ffff0001ec8f4000 x26: ffff0001ec5d27b0 >> x25: ffff0001ec8f4000 x24: ffff80001052bc00 >> x23: 0000000000000018 x22: 000000000000000a >> x21: ffff0001ec5d2100 x20: ffff0001ec8f4080 >> x19: ffff0001ec8f4000 x18: 0000000000000004 >> x17: 0000000000000000 x16: ffffadb4db85d310 >> x15: 0000000000000200 x14: 0000000000000010 >> x13: 0000000000000200 x12: ffff0001ec5d2100 >> x11: 000000000000000a x10: 0000020000200000 >> x9 : ffffadb4db7e7488 x8 : 0000000000000000 >> x7 : ffffadb4614fb110 x6 : 0000000000000000 >> x5 : ffff80001052bc00 x4 : 0000000000000020 >> x3 : 000000000000000a x2 : ffff0001ec5d2100 >> x1 : ffff0001ec8f4000 x0 : ffff0001ec8f4000 >> Kernel panic - not syncing: bad mode >> CPU: 1 PID: 265 Comm: cryptomgr_test Not tainted 5.8.11-300.fc33.aarch64 #1 >> Call trace: >> dump_backtrace+0x0/0x1c0 >> show_stack+0x24/0x30 >> dump_stack+0xc0/0x118 >> panic+0x144/0x358 >> arm64_serror_panic+0x0/0x98 >> el1_sync_handler+0x60/0x110 >> el1_sync+0xb4/0x180 >> aesbs_encrypt8+0x0/0x5f0 [aes_neon_bs] >> __xts_crypt+0xb0/0x2dc [aes_neon_bs] >> xts_encrypt+0x28/0x3c [aes_neon_bs] >> crypto_skcipher_encrypt+0x50/0x84 >> simd_skcipher_encrypt+0xc8/0xe0 >> crypto_skcipher_encrypt+0x50/0x84 >> test_skcipher_vec_cfg+0x224/0x5f0 >> test_skcipher+0xbc/0x120 >> alg_test_skcipher+0xa0/0x1b0 >> alg_test+0x3dc/0x47c >> cryptomgr_test+0x38/0x60 >> kthread+0x11c/0x120 >> ret_from_fork+0x10/0x18 >> SMP: stopping secondary CPUs >> Kernel Offset: 0x2db4cb7e0000 from 0xffff800010000000 >> PHYS_OFFSET: 0xffffcf2440000000 >> CPU features: 0x7e0152,20c02020 >> Memory Limit: none >> ---[ end Kernel panic - not syncing: bad mode ]--- >> >> Fixes: commit 0e89640b640d ("crypto: arm64 - Use modern annotations for assembly functions") >> Signed-off-by: Jeremy Linton <jeremy.linton@arm.com> >> --- >> arch/arm64/crypto/aes-neonbs-core.S | 8 ++++---- >> arch/arm64/include/asm/linkage.h | 6 +++++- >> 2 files changed, 9 insertions(+), 5 deletions(-) >> >> diff --git a/arch/arm64/crypto/aes-neonbs-core.S b/arch/arm64/crypto/aes-neonbs-core.S >> index b357164379f6..76de87ee80e1 100644 >> --- a/arch/arm64/crypto/aes-neonbs-core.S >> +++ b/arch/arm64/crypto/aes-neonbs-core.S >> @@ -428,7 +428,7 @@ SYM_FUNC_START(aesbs_convert_key) >> SYM_FUNC_END(aesbs_convert_key) >> >> .align 4 >> -SYM_FUNC_START_LOCAL(aesbs_encrypt8) >> +SYM_CODE_START_LOCAL(aesbs_encrypt8) >> ldr q9, [bskey], #16 // round 0 key >> ldr q8, M0SR >> ldr q24, SR >> @@ -488,10 +488,10 @@ SYM_FUNC_START_LOCAL(aesbs_encrypt8) >> eor v2.16b, v2.16b, v12.16b >> eor v5.16b, v5.16b, v12.16b >> ret >> -SYM_FUNC_END(aesbs_encrypt8) >> +SYM_END(aesbs_encrypt8) >> >> .align 4 >> -SYM_FUNC_START_LOCAL(aesbs_decrypt8) >> +SYM_CODE_START_LOCAL(aesbs_decrypt8) >> lsl x9, rounds, #7 >> add bskey, bskey, x9 >> >> @@ -553,7 +553,7 @@ SYM_FUNC_START_LOCAL(aesbs_decrypt8) >> eor v3.16b, v3.16b, v12.16b >> eor v5.16b, v5.16b, v12.16b >> ret >> -SYM_FUNC_END(aesbs_decrypt8) >> +SYM_END(aesbs_decrypt8) >> >> /* >> * aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, >> diff --git a/arch/arm64/include/asm/linkage.h b/arch/arm64/include/asm/linkage.h >> index ba89a9af820a..92c81ae8ac1e 100644 >> --- a/arch/arm64/include/asm/linkage.h >> +++ b/arch/arm64/include/asm/linkage.h >> @@ -11,7 +11,8 @@ >> * set the architecture version to v8.5 we use the hint instruction >> * instead. >> */ >> -#define BTI_C hint 34 ; >> +#define BTI_C hint 34 ; >> +#define BTI_JC hint 38 ; >> >> /* >> * When using in-kernel BTI we need to ensure that PCS-conformant assembly >> @@ -42,6 +43,9 @@ >> SYM_START(name, SYM_L_WEAK, SYM_A_NONE) \ >> BTI_C >> >> +#define SYM_CODE_START_LOCAL(name) \ >> + SYM_START(name, SYM_L_LOCAL, SYM_A_ALIGN) \ >> + BTI_JC >> #endif >> >> /* >> -- >> 2.25.4 >>
On Mon, Oct 05, 2020 at 01:18:04PM -0500, Jeremy Linton wrote: > The AES code uses a 'br x7' as part of a function called by > a macro, that ends up needing a BTI_J as a target. Lets > define SYN_CODE_START_LOCAL() for this and replace the > SYM_FUNC_START_LOCAL with a SYM_FUNC_CODE_LOCAL in the AES block. Really what the subject here should say is that this code is not a standard function and therefore should not be annotated as such - it's wrong with or without BTI, BTI just makes it very apparent. It'd also be better to split the change in linkage.h out into a separate patch, that'd make things clearer for review. > CPU: 1 PID: 265 Comm: cryptomgr_test Not tainted 5.8.11-300.fc33.aarch64 #1 > pstate: 20400c05 (nzCv daif +PAN -UAO BTYPE=j-) > pc : aesbs_encrypt8+0x0/0x5f0 [aes_neon_bs] > lr : aesbs_xts_encrypt+0x48/0xe0 [aes_neon_bs] > sp : ffff80001052b730 > x29: ffff80001052b730 x28: 0000000000000001 > x27: ffff0001ec8f4000 x26: ffff0001ec5d27b0 Please think hard before including complete backtraces in upstream reports, they are very large and contain almost no useful information relative to their size so often obscure the relevant content in your message. If part of the backtrace is usefully illustrative (it often is for search engines if nothing else) then it's usually better to pull out the relevant sections. > -SYM_FUNC_START_LOCAL(aesbs_encrypt8) > +SYM_CODE_START_LOCAL(aesbs_encrypt8) > ldr q9, [bskey], #16 // round 0 key > ldr q8, M0SR > ldr q24, SR > @@ -488,10 +488,10 @@ SYM_FUNC_START_LOCAL(aesbs_encrypt8) > eor v2.16b, v2.16b, v12.16b > eor v5.16b, v5.16b, v12.16b > ret > -SYM_FUNC_END(aesbs_encrypt8) > +SYM_END(aesbs_encrypt8) This should be SYM_CODE_END() to match the opening. However... > * When using in-kernel BTI we need to ensure that PCS-conformant assembly > @@ -42,6 +43,9 @@ > SYM_START(name, SYM_L_WEAK, SYM_A_NONE) \ > BTI_C > > +#define SYM_CODE_START_LOCAL(name) \ > + SYM_START(name, SYM_L_LOCAL, SYM_A_ALIGN) \ > + BTI_JC ...this is going to cause problems, SYM_CODE means that we should assemble *exactly* what was written since it's some non-standard thing - we use it for the vectors table for example. Looking at the code it's not 100% clear that the best approach here isn't just to change the call to a regular function call, this isn't a fast path or anything as far as I can see so it's unclear to me why we need to tail call. Failing that I think we need an annotation for tail called functions, that'd need to be a new thing as I am not seeing anything appropriate in the current generic annotations.
Hi, On 10/5/20 2:59 PM, Mark Brown wrote: > On Mon, Oct 05, 2020 at 01:18:04PM -0500, Jeremy Linton wrote: > >> The AES code uses a 'br x7' as part of a function called by >> a macro, that ends up needing a BTI_J as a target. Lets >> define SYN_CODE_START_LOCAL() for this and replace the >> SYM_FUNC_START_LOCAL with a SYM_FUNC_CODE_LOCAL in the AES block. > > Really what the subject here should say is that this code is not a > standard function and therefore should not be annotated as such - it's > wrong with or without BTI, BTI just makes it very apparent. It'd also > be better to split the change in linkage.h out into a separate patch, > that'd make things clearer for review. > >> CPU: 1 PID: 265 Comm: cryptomgr_test Not tainted 5.8.11-300.fc33.aarch64 #1 >> pstate: 20400c05 (nzCv daif +PAN -UAO BTYPE=j-) >> pc : aesbs_encrypt8+0x0/0x5f0 [aes_neon_bs] >> lr : aesbs_xts_encrypt+0x48/0xe0 [aes_neon_bs] >> sp : ffff80001052b730 >> x29: ffff80001052b730 x28: 0000000000000001 >> x27: ffff0001ec8f4000 x26: ffff0001ec5d27b0 > > Please think hard before including complete backtraces in upstream > reports, they are very large and contain almost no useful information > relative to their size so often obscure the relevant content in your > message. If part of the backtrace is usefully illustrative (it often is > for search engines if nothing else) then it's usually better to pull out > the relevant sections. > >> -SYM_FUNC_START_LOCAL(aesbs_encrypt8) >> +SYM_CODE_START_LOCAL(aesbs_encrypt8) >> ldr q9, [bskey], #16 // round 0 key >> ldr q8, M0SR >> ldr q24, SR >> @@ -488,10 +488,10 @@ SYM_FUNC_START_LOCAL(aesbs_encrypt8) >> eor v2.16b, v2.16b, v12.16b >> eor v5.16b, v5.16b, v12.16b >> ret >> -SYM_FUNC_END(aesbs_encrypt8) >> +SYM_END(aesbs_encrypt8) > > This should be SYM_CODE_END() to match the opening. However... > >> * When using in-kernel BTI we need to ensure that PCS-conformant assembly >> @@ -42,6 +43,9 @@ >> SYM_START(name, SYM_L_WEAK, SYM_A_NONE) \ >> BTI_C >> >> +#define SYM_CODE_START_LOCAL(name) \ >> + SYM_START(name, SYM_L_LOCAL, SYM_A_ALIGN) \ >> + BTI_JC > > ...this is going to cause problems, SYM_CODE means that we should > assemble *exactly* what was written since it's some non-standard thing - > we use it for the vectors table for example. Looking at the code it's > not 100% clear that the best approach here isn't just to change the call > to a regular function call, this isn't a fast path or anything as far as > I can see so it's unclear to me why we need to tail call. Well for some workloads its could be AFAIK. OTOH, Ard mentioned dumping the tail call too, and I think that is pretty reasonable. So it looks like that is a better plan since it also avoids all this SYM_ flailing. > > Failing that I think we need an annotation for tail called functions, > that'd need to be a new thing as I am not seeing anything appropriate in > the current generic annotations. >
On Mon, Oct 05, 2020 at 03:10:42PM -0500, Jeremy Linton wrote: > On 10/5/20 2:59 PM, Mark Brown wrote: > > ...this is going to cause problems, SYM_CODE means that we should > > assemble *exactly* what was written since it's some non-standard thing - > > we use it for the vectors table for example. Looking at the code it's > > not 100% clear that the best approach here isn't just to change the call > > to a regular function call, this isn't a fast path or anything as far as > > I can see so it's unclear to me why we need to tail call. > Well for some workloads its could be AFAIK. OTOH, Ard mentioned dumping the > tail call too, and I think that is pretty reasonable. So it looks like that > is a better plan since it also avoids all this SYM_ flailing. Yeah, I think that's the easiest thing all round.
Hi Jeremy, Thank you for the patch! Yet something to improve: [auto build test ERROR on arm64/for-next/core] [also build test ERROR on soc/for-next arm/for-next kvmarm/next v5.9-rc8 next-20201002] [cannot apply to xlnx/master] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch] url: https://github.com/0day-ci/linux/commits/Jeremy-Linton/arm64-bti-fix-BTI-to-handle-local-indirect-branches/20201006-021958 base: https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git for-next/core config: arm64-allyesconfig (attached as .config) compiler: aarch64-linux-gcc (GCC) 9.3.0 reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # https://github.com/0day-ci/linux/commit/37211f8bd05c9ebcede89bb5c371e34920355d4f git remote add linux-review https://github.com/0day-ci/linux git fetch --no-tags linux-review Jeremy-Linton/arm64-bti-fix-BTI-to-handle-local-indirect-branches/20201006-021958 git checkout 37211f8bd05c9ebcede89bb5c371e34920355d4f # save the attached .config to linux build tree COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=arm64 If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot <lkp@intel.com> All errors (new ones prefixed by >>): >> arch/arm64/crypto/aes-neonbs-core.S:491:23: error: macro "SYM_END" requires 2 arguments, but only 1 given 491 | SYM_END(aesbs_encrypt8) | ^ In file included from arch/arm64/crypto/aes-neonbs-core.S:17: include/linux/linkage.h:166: note: macro "SYM_END" defined here 166 | #define SYM_END(name, sym_type) \ | arch/arm64/crypto/aes-neonbs-core.S:556:23: error: macro "SYM_END" requires 2 arguments, but only 1 given 556 | SYM_END(aesbs_decrypt8) | ^ In file included from arch/arm64/crypto/aes-neonbs-core.S:17: include/linux/linkage.h:166: note: macro "SYM_END" defined here 166 | #define SYM_END(name, sym_type) \ | vim +/SYM_END +491 arch/arm64/crypto/aes-neonbs-core.S 429 430 .align 4 431 SYM_CODE_START_LOCAL(aesbs_encrypt8) 432 ldr q9, [bskey], #16 // round 0 key 433 ldr q8, M0SR 434 ldr q24, SR 435 436 eor v10.16b, v0.16b, v9.16b // xor with round0 key 437 eor v11.16b, v1.16b, v9.16b 438 tbl v0.16b, {v10.16b}, v8.16b 439 eor v12.16b, v2.16b, v9.16b 440 tbl v1.16b, {v11.16b}, v8.16b 441 eor v13.16b, v3.16b, v9.16b 442 tbl v2.16b, {v12.16b}, v8.16b 443 eor v14.16b, v4.16b, v9.16b 444 tbl v3.16b, {v13.16b}, v8.16b 445 eor v15.16b, v5.16b, v9.16b 446 tbl v4.16b, {v14.16b}, v8.16b 447 eor v10.16b, v6.16b, v9.16b 448 tbl v5.16b, {v15.16b}, v8.16b 449 eor v11.16b, v7.16b, v9.16b 450 tbl v6.16b, {v10.16b}, v8.16b 451 tbl v7.16b, {v11.16b}, v8.16b 452 453 bitslice v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11 454 455 sub rounds, rounds, #1 456 b .Lenc_sbox 457 458 .Lenc_loop: 459 shift_rows v0, v1, v2, v3, v4, v5, v6, v7, v24 460 .Lenc_sbox: 461 sbox v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, \ 462 v13, v14, v15 463 subs rounds, rounds, #1 464 b.cc .Lenc_done 465 466 enc_next_rk 467 468 mix_cols v0, v1, v4, v6, v3, v7, v2, v5, v8, v9, v10, v11, v12, \ 469 v13, v14, v15 470 471 add_round_key v0, v1, v2, v3, v4, v5, v6, v7 472 473 b.ne .Lenc_loop 474 ldr q24, SRM0 475 b .Lenc_loop 476 477 .Lenc_done: 478 ldr q12, [bskey] // last round key 479 480 bitslice v0, v1, v4, v6, v3, v7, v2, v5, v8, v9, v10, v11 481 482 eor v0.16b, v0.16b, v12.16b 483 eor v1.16b, v1.16b, v12.16b 484 eor v4.16b, v4.16b, v12.16b 485 eor v6.16b, v6.16b, v12.16b 486 eor v3.16b, v3.16b, v12.16b 487 eor v7.16b, v7.16b, v12.16b 488 eor v2.16b, v2.16b, v12.16b 489 eor v5.16b, v5.16b, v12.16b 490 ret > 491 SYM_END(aesbs_encrypt8) 492 493 .align 4 494 SYM_CODE_START_LOCAL(aesbs_decrypt8) 495 lsl x9, rounds, #7 496 add bskey, bskey, x9 497 498 ldr q9, [bskey, #-112]! // round 0 key 499 ldr q8, M0ISR 500 ldr q24, ISR 501 502 eor v10.16b, v0.16b, v9.16b // xor with round0 key 503 eor v11.16b, v1.16b, v9.16b 504 tbl v0.16b, {v10.16b}, v8.16b 505 eor v12.16b, v2.16b, v9.16b 506 tbl v1.16b, {v11.16b}, v8.16b 507 eor v13.16b, v3.16b, v9.16b 508 tbl v2.16b, {v12.16b}, v8.16b 509 eor v14.16b, v4.16b, v9.16b 510 tbl v3.16b, {v13.16b}, v8.16b 511 eor v15.16b, v5.16b, v9.16b 512 tbl v4.16b, {v14.16b}, v8.16b 513 eor v10.16b, v6.16b, v9.16b 514 tbl v5.16b, {v15.16b}, v8.16b 515 eor v11.16b, v7.16b, v9.16b 516 tbl v6.16b, {v10.16b}, v8.16b 517 tbl v7.16b, {v11.16b}, v8.16b 518 519 bitslice v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11 520 521 sub rounds, rounds, #1 522 b .Ldec_sbox 523 --- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
On Mon, Oct 05, 2020 at 02:24:47PM -0500, Jeremy Linton wrote: > Hi, > > On 10/5/20 1:54 PM, Ard Biesheuvel wrote: > >On Mon, 5 Oct 2020 at 20:18, Jeremy Linton <jeremy.linton@arm.com> wrote: > >> > >>The AES code uses a 'br x7' as part of a function called by > >>a macro, that ends up needing a BTI_J as a target. > > > >Could we instead just drop the tail call, i.e, replace it with a ret > >and do a 'bl' after it returns? The indirect call does not really > >serve a purpose here anyway > > Yes, that is an option, it adds an extra ret. Which probably doesn't mean > much in most cases. I assumed this code was optimized this way because it > mattered somewhere. Since this really does seem to be a tail-call and since x16 and x17 appear to be otherwise unused here, can we not just use x16 or x17 instead of x7? This relies on there being no other calls to veneered functions in the mix, but this code is all in a single section so that shouldn't be a concern. Due to the magic status of x16 and x17 in br instructions, the resulting jump should be compatible with BTI c. I think this matches how the compiler should typically compile tail-calls. Cheers ---Dave
On Tue, 6 Oct 2020 at 11:38, Dave Martin <Dave.Martin@arm.com> wrote: > > On Mon, Oct 05, 2020 at 02:24:47PM -0500, Jeremy Linton wrote: > > Hi, > > > > On 10/5/20 1:54 PM, Ard Biesheuvel wrote: > > >On Mon, 5 Oct 2020 at 20:18, Jeremy Linton <jeremy.linton@arm.com> wrote: > > >> > > >>The AES code uses a 'br x7' as part of a function called by > > >>a macro, that ends up needing a BTI_J as a target. > > > > > >Could we instead just drop the tail call, i.e, replace it with a ret > > >and do a 'bl' after it returns? The indirect call does not really > > >serve a purpose here anyway > > > > Yes, that is an option, it adds an extra ret. Which probably doesn't mean > > much in most cases. I assumed this code was optimized this way because it > > mattered somewhere. > > Since this really does seem to be a tail-call and since x16 and x17 > appear to be otherwise unused here, can we not just use x16 or x17 > instead of x7? > > This relies on there being no other calls to veneered functions in the > mix, but this code is all in a single section so that shouldn't be a > concern. > > Due to the magic status of x16 and x17 in br instructions, the resulting > jump should be compatible with BTI c. I think this matches how the > compiler should typically compile tail-calls. > Ah, excellent point. That is definitely the cleanest fix.
diff --git a/arch/arm64/crypto/aes-neonbs-core.S b/arch/arm64/crypto/aes-neonbs-core.S index b357164379f6..76de87ee80e1 100644 --- a/arch/arm64/crypto/aes-neonbs-core.S +++ b/arch/arm64/crypto/aes-neonbs-core.S @@ -428,7 +428,7 @@ SYM_FUNC_START(aesbs_convert_key) SYM_FUNC_END(aesbs_convert_key) .align 4 -SYM_FUNC_START_LOCAL(aesbs_encrypt8) +SYM_CODE_START_LOCAL(aesbs_encrypt8) ldr q9, [bskey], #16 // round 0 key ldr q8, M0SR ldr q24, SR @@ -488,10 +488,10 @@ SYM_FUNC_START_LOCAL(aesbs_encrypt8) eor v2.16b, v2.16b, v12.16b eor v5.16b, v5.16b, v12.16b ret -SYM_FUNC_END(aesbs_encrypt8) +SYM_END(aesbs_encrypt8) .align 4 -SYM_FUNC_START_LOCAL(aesbs_decrypt8) +SYM_CODE_START_LOCAL(aesbs_decrypt8) lsl x9, rounds, #7 add bskey, bskey, x9 @@ -553,7 +553,7 @@ SYM_FUNC_START_LOCAL(aesbs_decrypt8) eor v3.16b, v3.16b, v12.16b eor v5.16b, v5.16b, v12.16b ret -SYM_FUNC_END(aesbs_decrypt8) +SYM_END(aesbs_decrypt8) /* * aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, diff --git a/arch/arm64/include/asm/linkage.h b/arch/arm64/include/asm/linkage.h index ba89a9af820a..92c81ae8ac1e 100644 --- a/arch/arm64/include/asm/linkage.h +++ b/arch/arm64/include/asm/linkage.h @@ -11,7 +11,8 @@ * set the architecture version to v8.5 we use the hint instruction * instead. */ -#define BTI_C hint 34 ; +#define BTI_C hint 34 ; +#define BTI_JC hint 38 ; /* * When using in-kernel BTI we need to ensure that PCS-conformant assembly @@ -42,6 +43,9 @@ SYM_START(name, SYM_L_WEAK, SYM_A_NONE) \ BTI_C +#define SYM_CODE_START_LOCAL(name) \ + SYM_START(name, SYM_L_LOCAL, SYM_A_ALIGN) \ + BTI_JC #endif /*
The AES code uses a 'br x7' as part of a function called by a macro, that ends up needing a BTI_J as a target. Lets define SYN_CODE_START_LOCAL() for this and replace the SYM_FUNC_START_LOCAL with a SYM_FUNC_CODE_LOCAL in the AES block. Without this kernels compiled with the aes_neon_bs will OOPS at boot. Bad mode in Synchronous Abort handler detected on CPU1, code 0x34000003 -- BTI CPU: 1 PID: 265 Comm: cryptomgr_test Not tainted 5.8.11-300.fc33.aarch64 #1 pstate: 20400c05 (nzCv daif +PAN -UAO BTYPE=j-) pc : aesbs_encrypt8+0x0/0x5f0 [aes_neon_bs] lr : aesbs_xts_encrypt+0x48/0xe0 [aes_neon_bs] sp : ffff80001052b730 x29: ffff80001052b730 x28: 0000000000000001 x27: ffff0001ec8f4000 x26: ffff0001ec5d27b0 x25: ffff0001ec8f4000 x24: ffff80001052bc00 x23: 0000000000000018 x22: 000000000000000a x21: ffff0001ec5d2100 x20: ffff0001ec8f4080 x19: ffff0001ec8f4000 x18: 0000000000000004 x17: 0000000000000000 x16: ffffadb4db85d310 x15: 0000000000000200 x14: 0000000000000010 x13: 0000000000000200 x12: ffff0001ec5d2100 x11: 000000000000000a x10: 0000020000200000 x9 : ffffadb4db7e7488 x8 : 0000000000000000 x7 : ffffadb4614fb110 x6 : 0000000000000000 x5 : ffff80001052bc00 x4 : 0000000000000020 x3 : 000000000000000a x2 : ffff0001ec5d2100 x1 : ffff0001ec8f4000 x0 : ffff0001ec8f4000 Kernel panic - not syncing: bad mode CPU: 1 PID: 265 Comm: cryptomgr_test Not tainted 5.8.11-300.fc33.aarch64 #1 Call trace: dump_backtrace+0x0/0x1c0 show_stack+0x24/0x30 dump_stack+0xc0/0x118 panic+0x144/0x358 arm64_serror_panic+0x0/0x98 el1_sync_handler+0x60/0x110 el1_sync+0xb4/0x180 aesbs_encrypt8+0x0/0x5f0 [aes_neon_bs] __xts_crypt+0xb0/0x2dc [aes_neon_bs] xts_encrypt+0x28/0x3c [aes_neon_bs] crypto_skcipher_encrypt+0x50/0x84 simd_skcipher_encrypt+0xc8/0xe0 crypto_skcipher_encrypt+0x50/0x84 test_skcipher_vec_cfg+0x224/0x5f0 test_skcipher+0xbc/0x120 alg_test_skcipher+0xa0/0x1b0 alg_test+0x3dc/0x47c cryptomgr_test+0x38/0x60 kthread+0x11c/0x120 ret_from_fork+0x10/0x18 SMP: stopping secondary CPUs Kernel Offset: 0x2db4cb7e0000 from 0xffff800010000000 PHYS_OFFSET: 0xffffcf2440000000 CPU features: 0x7e0152,20c02020 Memory Limit: none ---[ end Kernel panic - not syncing: bad mode ]--- Fixes: commit 0e89640b640d ("crypto: arm64 - Use modern annotations for assembly functions") Signed-off-by: Jeremy Linton <jeremy.linton@arm.com> --- arch/arm64/crypto/aes-neonbs-core.S | 8 ++++---- arch/arm64/include/asm/linkage.h | 6 +++++- 2 files changed, 9 insertions(+), 5 deletions(-)