diff mbox series

[BUG] arm64: bti: fix BTI to handle local indirect branches

Message ID 20201005181804.1331237-1-jeremy.linton@arm.com (mailing list archive)
State Not Applicable
Delegated to: Herbert Xu
Headers show
Series [BUG] arm64: bti: fix BTI to handle local indirect branches | expand

Commit Message

Jeremy Linton Oct. 5, 2020, 6:18 p.m. UTC
The AES code uses a 'br x7' as part of a function called by
a macro, that ends up needing a BTI_J as a target. Lets
define SYN_CODE_START_LOCAL() for this and replace the
SYM_FUNC_START_LOCAL with a SYM_FUNC_CODE_LOCAL in the AES block.

Without this kernels compiled with the aes_neon_bs will OOPS
at boot.

  Bad mode in Synchronous Abort handler detected on CPU1, code 0x34000003 -- BTI
  CPU: 1 PID: 265 Comm: cryptomgr_test Not tainted 5.8.11-300.fc33.aarch64 #1
  pstate: 20400c05 (nzCv daif +PAN -UAO BTYPE=j-)
  pc : aesbs_encrypt8+0x0/0x5f0 [aes_neon_bs]
  lr : aesbs_xts_encrypt+0x48/0xe0 [aes_neon_bs]
  sp : ffff80001052b730
  x29: ffff80001052b730 x28: 0000000000000001
  x27: ffff0001ec8f4000 x26: ffff0001ec5d27b0
  x25: ffff0001ec8f4000 x24: ffff80001052bc00
  x23: 0000000000000018 x22: 000000000000000a
  x21: ffff0001ec5d2100 x20: ffff0001ec8f4080
  x19: ffff0001ec8f4000 x18: 0000000000000004
  x17: 0000000000000000 x16: ffffadb4db85d310
  x15: 0000000000000200 x14: 0000000000000010
  x13: 0000000000000200 x12: ffff0001ec5d2100
  x11: 000000000000000a x10: 0000020000200000
  x9 : ffffadb4db7e7488 x8 : 0000000000000000
  x7 : ffffadb4614fb110 x6 : 0000000000000000
  x5 : ffff80001052bc00 x4 : 0000000000000020
  x3 : 000000000000000a x2 : ffff0001ec5d2100
  x1 : ffff0001ec8f4000 x0 : ffff0001ec8f4000
  Kernel panic - not syncing: bad mode
  CPU: 1 PID: 265 Comm: cryptomgr_test Not tainted 5.8.11-300.fc33.aarch64 #1
  Call trace:
   dump_backtrace+0x0/0x1c0
   show_stack+0x24/0x30
   dump_stack+0xc0/0x118
   panic+0x144/0x358
   arm64_serror_panic+0x0/0x98
   el1_sync_handler+0x60/0x110
   el1_sync+0xb4/0x180
   aesbs_encrypt8+0x0/0x5f0 [aes_neon_bs]
   __xts_crypt+0xb0/0x2dc [aes_neon_bs]
   xts_encrypt+0x28/0x3c [aes_neon_bs]
  crypto_skcipher_encrypt+0x50/0x84
  simd_skcipher_encrypt+0xc8/0xe0
  crypto_skcipher_encrypt+0x50/0x84
  test_skcipher_vec_cfg+0x224/0x5f0
  test_skcipher+0xbc/0x120
  alg_test_skcipher+0xa0/0x1b0
  alg_test+0x3dc/0x47c
  cryptomgr_test+0x38/0x60
  kthread+0x11c/0x120
  ret_from_fork+0x10/0x18
 SMP: stopping secondary CPUs
 Kernel Offset: 0x2db4cb7e0000 from 0xffff800010000000
 PHYS_OFFSET: 0xffffcf2440000000
 CPU features: 0x7e0152,20c02020
 Memory Limit: none
 ---[ end Kernel panic - not syncing: bad mode ]---

Fixes: commit 0e89640b640d ("crypto: arm64 - Use modern annotations for assembly functions")
Signed-off-by: Jeremy Linton <jeremy.linton@arm.com>
---
 arch/arm64/crypto/aes-neonbs-core.S | 8 ++++----
 arch/arm64/include/asm/linkage.h    | 6 +++++-
 2 files changed, 9 insertions(+), 5 deletions(-)

Comments

Ard Biesheuvel Oct. 5, 2020, 6:54 p.m. UTC | #1
On Mon, 5 Oct 2020 at 20:18, Jeremy Linton <jeremy.linton@arm.com> wrote:
>
> The AES code uses a 'br x7' as part of a function called by
> a macro, that ends up needing a BTI_J as a target.

Could we instead just drop the tail call, i.e, replace it with a ret
and do a 'bl' after it returns? The indirect call does not really
serve a purpose here anyway


> Lets
> define SYN_CODE_START_LOCAL() for this and replace the
> SYM_FUNC_START_LOCAL with a SYM_FUNC_CODE_LOCAL in the AES block.
>
> Without this kernels compiled with the aes_neon_bs will OOPS
> at boot.
>
>   Bad mode in Synchronous Abort handler detected on CPU1, code 0x34000003 -- BTI
>   CPU: 1 PID: 265 Comm: cryptomgr_test Not tainted 5.8.11-300.fc33.aarch64 #1
>   pstate: 20400c05 (nzCv daif +PAN -UAO BTYPE=j-)
>   pc : aesbs_encrypt8+0x0/0x5f0 [aes_neon_bs]
>   lr : aesbs_xts_encrypt+0x48/0xe0 [aes_neon_bs]
>   sp : ffff80001052b730
>   x29: ffff80001052b730 x28: 0000000000000001
>   x27: ffff0001ec8f4000 x26: ffff0001ec5d27b0
>   x25: ffff0001ec8f4000 x24: ffff80001052bc00
>   x23: 0000000000000018 x22: 000000000000000a
>   x21: ffff0001ec5d2100 x20: ffff0001ec8f4080
>   x19: ffff0001ec8f4000 x18: 0000000000000004
>   x17: 0000000000000000 x16: ffffadb4db85d310
>   x15: 0000000000000200 x14: 0000000000000010
>   x13: 0000000000000200 x12: ffff0001ec5d2100
>   x11: 000000000000000a x10: 0000020000200000
>   x9 : ffffadb4db7e7488 x8 : 0000000000000000
>   x7 : ffffadb4614fb110 x6 : 0000000000000000
>   x5 : ffff80001052bc00 x4 : 0000000000000020
>   x3 : 000000000000000a x2 : ffff0001ec5d2100
>   x1 : ffff0001ec8f4000 x0 : ffff0001ec8f4000
>   Kernel panic - not syncing: bad mode
>   CPU: 1 PID: 265 Comm: cryptomgr_test Not tainted 5.8.11-300.fc33.aarch64 #1
>   Call trace:
>    dump_backtrace+0x0/0x1c0
>    show_stack+0x24/0x30
>    dump_stack+0xc0/0x118
>    panic+0x144/0x358
>    arm64_serror_panic+0x0/0x98
>    el1_sync_handler+0x60/0x110
>    el1_sync+0xb4/0x180
>    aesbs_encrypt8+0x0/0x5f0 [aes_neon_bs]
>    __xts_crypt+0xb0/0x2dc [aes_neon_bs]
>    xts_encrypt+0x28/0x3c [aes_neon_bs]
>   crypto_skcipher_encrypt+0x50/0x84
>   simd_skcipher_encrypt+0xc8/0xe0
>   crypto_skcipher_encrypt+0x50/0x84
>   test_skcipher_vec_cfg+0x224/0x5f0
>   test_skcipher+0xbc/0x120
>   alg_test_skcipher+0xa0/0x1b0
>   alg_test+0x3dc/0x47c
>   cryptomgr_test+0x38/0x60
>   kthread+0x11c/0x120
>   ret_from_fork+0x10/0x18
>  SMP: stopping secondary CPUs
>  Kernel Offset: 0x2db4cb7e0000 from 0xffff800010000000
>  PHYS_OFFSET: 0xffffcf2440000000
>  CPU features: 0x7e0152,20c02020
>  Memory Limit: none
>  ---[ end Kernel panic - not syncing: bad mode ]---
>
> Fixes: commit 0e89640b640d ("crypto: arm64 - Use modern annotations for assembly functions")
> Signed-off-by: Jeremy Linton <jeremy.linton@arm.com>
> ---
>  arch/arm64/crypto/aes-neonbs-core.S | 8 ++++----
>  arch/arm64/include/asm/linkage.h    | 6 +++++-
>  2 files changed, 9 insertions(+), 5 deletions(-)
>
> diff --git a/arch/arm64/crypto/aes-neonbs-core.S b/arch/arm64/crypto/aes-neonbs-core.S
> index b357164379f6..76de87ee80e1 100644
> --- a/arch/arm64/crypto/aes-neonbs-core.S
> +++ b/arch/arm64/crypto/aes-neonbs-core.S
> @@ -428,7 +428,7 @@ SYM_FUNC_START(aesbs_convert_key)
>  SYM_FUNC_END(aesbs_convert_key)
>
>         .align          4
> -SYM_FUNC_START_LOCAL(aesbs_encrypt8)
> +SYM_CODE_START_LOCAL(aesbs_encrypt8)
>         ldr             q9, [bskey], #16                // round 0 key
>         ldr             q8, M0SR
>         ldr             q24, SR
> @@ -488,10 +488,10 @@ SYM_FUNC_START_LOCAL(aesbs_encrypt8)
>         eor             v2.16b, v2.16b, v12.16b
>         eor             v5.16b, v5.16b, v12.16b
>         ret
> -SYM_FUNC_END(aesbs_encrypt8)
> +SYM_END(aesbs_encrypt8)
>
>         .align          4
> -SYM_FUNC_START_LOCAL(aesbs_decrypt8)
> +SYM_CODE_START_LOCAL(aesbs_decrypt8)
>         lsl             x9, rounds, #7
>         add             bskey, bskey, x9
>
> @@ -553,7 +553,7 @@ SYM_FUNC_START_LOCAL(aesbs_decrypt8)
>         eor             v3.16b, v3.16b, v12.16b
>         eor             v5.16b, v5.16b, v12.16b
>         ret
> -SYM_FUNC_END(aesbs_decrypt8)
> +SYM_END(aesbs_decrypt8)
>
>         /*
>          * aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
> diff --git a/arch/arm64/include/asm/linkage.h b/arch/arm64/include/asm/linkage.h
> index ba89a9af820a..92c81ae8ac1e 100644
> --- a/arch/arm64/include/asm/linkage.h
> +++ b/arch/arm64/include/asm/linkage.h
> @@ -11,7 +11,8 @@
>   * set the architecture version to v8.5 we use the hint instruction
>   * instead.
>   */
> -#define BTI_C hint 34 ;
> +#define BTI_C  hint 34 ;
> +#define BTI_JC hint 38 ;
>
>  /*
>   * When using in-kernel BTI we need to ensure that PCS-conformant assembly
> @@ -42,6 +43,9 @@
>         SYM_START(name, SYM_L_WEAK, SYM_A_NONE)         \
>         BTI_C
>
> +#define SYM_CODE_START_LOCAL(name)                     \
> +       SYM_START(name, SYM_L_LOCAL, SYM_A_ALIGN)       \
> +       BTI_JC
>  #endif
>
>  /*
> --
> 2.25.4
>
Jeremy Linton Oct. 5, 2020, 7:24 p.m. UTC | #2
Hi,

On 10/5/20 1:54 PM, Ard Biesheuvel wrote:
> On Mon, 5 Oct 2020 at 20:18, Jeremy Linton <jeremy.linton@arm.com> wrote:
>>
>> The AES code uses a 'br x7' as part of a function called by
>> a macro, that ends up needing a BTI_J as a target.
> 
> Could we instead just drop the tail call, i.e, replace it with a ret
> and do a 'bl' after it returns? The indirect call does not really
> serve a purpose here anyway

Yes, that is an option, it adds an extra ret. Which probably doesn't 
mean much in most cases. I assumed this code was optimized this way 
because it mattered somewhere.

> 
> 
>> Lets
>> define SYN_CODE_START_LOCAL() for this and replace the
>> SYM_FUNC_START_LOCAL with a SYM_FUNC_CODE_LOCAL in the AES block.
                                ^ That should have read 
SYM_CODE_START_LOCAL.
>>
>> Without this kernels compiled with the aes_neon_bs will OOPS
>> at boot.
>>
>>    Bad mode in Synchronous Abort handler detected on CPU1, code 0x34000003 -- BTI
>>    CPU: 1 PID: 265 Comm: cryptomgr_test Not tainted 5.8.11-300.fc33.aarch64 #1
>>    pstate: 20400c05 (nzCv daif +PAN -UAO BTYPE=j-)
>>    pc : aesbs_encrypt8+0x0/0x5f0 [aes_neon_bs]
>>    lr : aesbs_xts_encrypt+0x48/0xe0 [aes_neon_bs]
>>    sp : ffff80001052b730
>>    x29: ffff80001052b730 x28: 0000000000000001
>>    x27: ffff0001ec8f4000 x26: ffff0001ec5d27b0
>>    x25: ffff0001ec8f4000 x24: ffff80001052bc00
>>    x23: 0000000000000018 x22: 000000000000000a
>>    x21: ffff0001ec5d2100 x20: ffff0001ec8f4080
>>    x19: ffff0001ec8f4000 x18: 0000000000000004
>>    x17: 0000000000000000 x16: ffffadb4db85d310
>>    x15: 0000000000000200 x14: 0000000000000010
>>    x13: 0000000000000200 x12: ffff0001ec5d2100
>>    x11: 000000000000000a x10: 0000020000200000
>>    x9 : ffffadb4db7e7488 x8 : 0000000000000000
>>    x7 : ffffadb4614fb110 x6 : 0000000000000000
>>    x5 : ffff80001052bc00 x4 : 0000000000000020
>>    x3 : 000000000000000a x2 : ffff0001ec5d2100
>>    x1 : ffff0001ec8f4000 x0 : ffff0001ec8f4000
>>    Kernel panic - not syncing: bad mode
>>    CPU: 1 PID: 265 Comm: cryptomgr_test Not tainted 5.8.11-300.fc33.aarch64 #1
>>    Call trace:
>>     dump_backtrace+0x0/0x1c0
>>     show_stack+0x24/0x30
>>     dump_stack+0xc0/0x118
>>     panic+0x144/0x358
>>     arm64_serror_panic+0x0/0x98
>>     el1_sync_handler+0x60/0x110
>>     el1_sync+0xb4/0x180
>>     aesbs_encrypt8+0x0/0x5f0 [aes_neon_bs]
>>     __xts_crypt+0xb0/0x2dc [aes_neon_bs]
>>     xts_encrypt+0x28/0x3c [aes_neon_bs]
>>    crypto_skcipher_encrypt+0x50/0x84
>>    simd_skcipher_encrypt+0xc8/0xe0
>>    crypto_skcipher_encrypt+0x50/0x84
>>    test_skcipher_vec_cfg+0x224/0x5f0
>>    test_skcipher+0xbc/0x120
>>    alg_test_skcipher+0xa0/0x1b0
>>    alg_test+0x3dc/0x47c
>>    cryptomgr_test+0x38/0x60
>>    kthread+0x11c/0x120
>>    ret_from_fork+0x10/0x18
>>   SMP: stopping secondary CPUs
>>   Kernel Offset: 0x2db4cb7e0000 from 0xffff800010000000
>>   PHYS_OFFSET: 0xffffcf2440000000
>>   CPU features: 0x7e0152,20c02020
>>   Memory Limit: none
>>   ---[ end Kernel panic - not syncing: bad mode ]---
>>
>> Fixes: commit 0e89640b640d ("crypto: arm64 - Use modern annotations for assembly functions")
>> Signed-off-by: Jeremy Linton <jeremy.linton@arm.com>
>> ---
>>   arch/arm64/crypto/aes-neonbs-core.S | 8 ++++----
>>   arch/arm64/include/asm/linkage.h    | 6 +++++-
>>   2 files changed, 9 insertions(+), 5 deletions(-)
>>
>> diff --git a/arch/arm64/crypto/aes-neonbs-core.S b/arch/arm64/crypto/aes-neonbs-core.S
>> index b357164379f6..76de87ee80e1 100644
>> --- a/arch/arm64/crypto/aes-neonbs-core.S
>> +++ b/arch/arm64/crypto/aes-neonbs-core.S
>> @@ -428,7 +428,7 @@ SYM_FUNC_START(aesbs_convert_key)
>>   SYM_FUNC_END(aesbs_convert_key)
>>
>>          .align          4
>> -SYM_FUNC_START_LOCAL(aesbs_encrypt8)
>> +SYM_CODE_START_LOCAL(aesbs_encrypt8)
>>          ldr             q9, [bskey], #16                // round 0 key
>>          ldr             q8, M0SR
>>          ldr             q24, SR
>> @@ -488,10 +488,10 @@ SYM_FUNC_START_LOCAL(aesbs_encrypt8)
>>          eor             v2.16b, v2.16b, v12.16b
>>          eor             v5.16b, v5.16b, v12.16b
>>          ret
>> -SYM_FUNC_END(aesbs_encrypt8)
>> +SYM_END(aesbs_encrypt8)
>>
>>          .align          4
>> -SYM_FUNC_START_LOCAL(aesbs_decrypt8)
>> +SYM_CODE_START_LOCAL(aesbs_decrypt8)
>>          lsl             x9, rounds, #7
>>          add             bskey, bskey, x9
>>
>> @@ -553,7 +553,7 @@ SYM_FUNC_START_LOCAL(aesbs_decrypt8)
>>          eor             v3.16b, v3.16b, v12.16b
>>          eor             v5.16b, v5.16b, v12.16b
>>          ret
>> -SYM_FUNC_END(aesbs_decrypt8)
>> +SYM_END(aesbs_decrypt8)
>>
>>          /*
>>           * aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
>> diff --git a/arch/arm64/include/asm/linkage.h b/arch/arm64/include/asm/linkage.h
>> index ba89a9af820a..92c81ae8ac1e 100644
>> --- a/arch/arm64/include/asm/linkage.h
>> +++ b/arch/arm64/include/asm/linkage.h
>> @@ -11,7 +11,8 @@
>>    * set the architecture version to v8.5 we use the hint instruction
>>    * instead.
>>    */
>> -#define BTI_C hint 34 ;
>> +#define BTI_C  hint 34 ;
>> +#define BTI_JC hint 38 ;
>>
>>   /*
>>    * When using in-kernel BTI we need to ensure that PCS-conformant assembly
>> @@ -42,6 +43,9 @@
>>          SYM_START(name, SYM_L_WEAK, SYM_A_NONE)         \
>>          BTI_C
>>
>> +#define SYM_CODE_START_LOCAL(name)                     \
>> +       SYM_START(name, SYM_L_LOCAL, SYM_A_ALIGN)       \
>> +       BTI_JC
>>   #endif
>>
>>   /*
>> --
>> 2.25.4
>>
Mark Brown Oct. 5, 2020, 7:59 p.m. UTC | #3
On Mon, Oct 05, 2020 at 01:18:04PM -0500, Jeremy Linton wrote:

> The AES code uses a 'br x7' as part of a function called by
> a macro, that ends up needing a BTI_J as a target. Lets
> define SYN_CODE_START_LOCAL() for this and replace the
> SYM_FUNC_START_LOCAL with a SYM_FUNC_CODE_LOCAL in the AES block.

Really what the subject here should say is that this code is not a
standard function and therefore should not be annotated as such - it's
wrong with or without BTI, BTI just makes it very apparent.  It'd also
be better to split the change in linkage.h out into a separate patch,
that'd make things clearer for review.

>   CPU: 1 PID: 265 Comm: cryptomgr_test Not tainted 5.8.11-300.fc33.aarch64 #1
>   pstate: 20400c05 (nzCv daif +PAN -UAO BTYPE=j-)
>   pc : aesbs_encrypt8+0x0/0x5f0 [aes_neon_bs]
>   lr : aesbs_xts_encrypt+0x48/0xe0 [aes_neon_bs]
>   sp : ffff80001052b730
>   x29: ffff80001052b730 x28: 0000000000000001
>   x27: ffff0001ec8f4000 x26: ffff0001ec5d27b0

Please think hard before including complete backtraces in upstream
reports, they are very large and contain almost no useful information
relative to their size so often obscure the relevant content in your
message. If part of the backtrace is usefully illustrative (it often is
for search engines if nothing else) then it's usually better to pull out
the relevant sections.

> -SYM_FUNC_START_LOCAL(aesbs_encrypt8)
> +SYM_CODE_START_LOCAL(aesbs_encrypt8)
>  	ldr		q9, [bskey], #16		// round 0 key
>  	ldr		q8, M0SR
>  	ldr		q24, SR
> @@ -488,10 +488,10 @@ SYM_FUNC_START_LOCAL(aesbs_encrypt8)
>  	eor		v2.16b, v2.16b, v12.16b
>  	eor		v5.16b, v5.16b, v12.16b
>  	ret
> -SYM_FUNC_END(aesbs_encrypt8)
> +SYM_END(aesbs_encrypt8)

This should be SYM_CODE_END() to match the opening.  However...

>   * When using in-kernel BTI we need to ensure that PCS-conformant assembly
> @@ -42,6 +43,9 @@
>  	SYM_START(name, SYM_L_WEAK, SYM_A_NONE)		\
>  	BTI_C
>  
> +#define SYM_CODE_START_LOCAL(name)			\
> +	SYM_START(name, SYM_L_LOCAL, SYM_A_ALIGN)       \
> +	BTI_JC

...this is going to cause problems, SYM_CODE means that we should
assemble *exactly* what was written since it's some non-standard thing -
we use it for the vectors table for example.  Looking at the code it's
not 100% clear that the best approach here isn't just to change the call
to a regular function call, this isn't a fast path or anything as far as
I can see so it's unclear to me why we need to tail call.

Failing that I think we need an annotation for tail called functions, 
that'd need to be a new thing as I am not seeing anything appropriate in
the current generic annotations.
Jeremy Linton Oct. 5, 2020, 8:10 p.m. UTC | #4
Hi,


On 10/5/20 2:59 PM, Mark Brown wrote:
> On Mon, Oct 05, 2020 at 01:18:04PM -0500, Jeremy Linton wrote:
> 
>> The AES code uses a 'br x7' as part of a function called by
>> a macro, that ends up needing a BTI_J as a target. Lets
>> define SYN_CODE_START_LOCAL() for this and replace the
>> SYM_FUNC_START_LOCAL with a SYM_FUNC_CODE_LOCAL in the AES block.
> 
> Really what the subject here should say is that this code is not a
> standard function and therefore should not be annotated as such - it's
> wrong with or without BTI, BTI just makes it very apparent.  It'd also
> be better to split the change in linkage.h out into a separate patch,
> that'd make things clearer for review.
> 
>>    CPU: 1 PID: 265 Comm: cryptomgr_test Not tainted 5.8.11-300.fc33.aarch64 #1
>>    pstate: 20400c05 (nzCv daif +PAN -UAO BTYPE=j-)
>>    pc : aesbs_encrypt8+0x0/0x5f0 [aes_neon_bs]
>>    lr : aesbs_xts_encrypt+0x48/0xe0 [aes_neon_bs]
>>    sp : ffff80001052b730
>>    x29: ffff80001052b730 x28: 0000000000000001
>>    x27: ffff0001ec8f4000 x26: ffff0001ec5d27b0
> 
> Please think hard before including complete backtraces in upstream
> reports, they are very large and contain almost no useful information
> relative to their size so often obscure the relevant content in your
> message. If part of the backtrace is usefully illustrative (it often is
> for search engines if nothing else) then it's usually better to pull out
> the relevant sections.
> 
>> -SYM_FUNC_START_LOCAL(aesbs_encrypt8)
>> +SYM_CODE_START_LOCAL(aesbs_encrypt8)
>>   	ldr		q9, [bskey], #16		// round 0 key
>>   	ldr		q8, M0SR
>>   	ldr		q24, SR
>> @@ -488,10 +488,10 @@ SYM_FUNC_START_LOCAL(aesbs_encrypt8)
>>   	eor		v2.16b, v2.16b, v12.16b
>>   	eor		v5.16b, v5.16b, v12.16b
>>   	ret
>> -SYM_FUNC_END(aesbs_encrypt8)
>> +SYM_END(aesbs_encrypt8)
> 
> This should be SYM_CODE_END() to match the opening.  However...
> 
>>    * When using in-kernel BTI we need to ensure that PCS-conformant assembly
>> @@ -42,6 +43,9 @@
>>   	SYM_START(name, SYM_L_WEAK, SYM_A_NONE)		\
>>   	BTI_C
>>   
>> +#define SYM_CODE_START_LOCAL(name)			\
>> +	SYM_START(name, SYM_L_LOCAL, SYM_A_ALIGN)       \
>> +	BTI_JC
> 
> ...this is going to cause problems, SYM_CODE means that we should
> assemble *exactly* what was written since it's some non-standard thing -
> we use it for the vectors table for example.  Looking at the code it's
> not 100% clear that the best approach here isn't just to change the call
> to a regular function call, this isn't a fast path or anything as far as
> I can see so it's unclear to me why we need to tail call.

Well for some workloads its could be AFAIK. OTOH, Ard mentioned dumping 
the tail call too, and I think that is pretty reasonable. So it looks 
like that is a better plan since it also avoids all this SYM_ flailing.


> 
> Failing that I think we need an annotation for tail called functions,
> that'd need to be a new thing as I am not seeing anything appropriate in
> the current generic annotations.
>
Mark Brown Oct. 5, 2020, 8:28 p.m. UTC | #5
On Mon, Oct 05, 2020 at 03:10:42PM -0500, Jeremy Linton wrote:
> On 10/5/20 2:59 PM, Mark Brown wrote:

> > ...this is going to cause problems, SYM_CODE means that we should
> > assemble *exactly* what was written since it's some non-standard thing -
> > we use it for the vectors table for example.  Looking at the code it's
> > not 100% clear that the best approach here isn't just to change the call
> > to a regular function call, this isn't a fast path or anything as far as
> > I can see so it's unclear to me why we need to tail call.

> Well for some workloads its could be AFAIK. OTOH, Ard mentioned dumping the
> tail call too, and I think that is pretty reasonable. So it looks like that
> is a better plan since it also avoids all this SYM_ flailing.

Yeah, I think that's the easiest thing all round.
kernel test robot Oct. 6, 2020, 7:23 a.m. UTC | #6
Hi Jeremy,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on arm64/for-next/core]
[also build test ERROR on soc/for-next arm/for-next kvmarm/next v5.9-rc8 next-20201002]
[cannot apply to xlnx/master]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Jeremy-Linton/arm64-bti-fix-BTI-to-handle-local-indirect-branches/20201006-021958
base:   https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git for-next/core
config: arm64-allyesconfig (attached as .config)
compiler: aarch64-linux-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/0day-ci/linux/commit/37211f8bd05c9ebcede89bb5c371e34920355d4f
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Jeremy-Linton/arm64-bti-fix-BTI-to-handle-local-indirect-branches/20201006-021958
        git checkout 37211f8bd05c9ebcede89bb5c371e34920355d4f
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=arm64 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

>> arch/arm64/crypto/aes-neonbs-core.S:491:23: error: macro "SYM_END" requires 2 arguments, but only 1 given
     491 | SYM_END(aesbs_encrypt8)
         |                       ^
   In file included from arch/arm64/crypto/aes-neonbs-core.S:17:
   include/linux/linkage.h:166: note: macro "SYM_END" defined here
     166 | #define SYM_END(name, sym_type)    \
         | 
   arch/arm64/crypto/aes-neonbs-core.S:556:23: error: macro "SYM_END" requires 2 arguments, but only 1 given
     556 | SYM_END(aesbs_decrypt8)
         |                       ^
   In file included from arch/arm64/crypto/aes-neonbs-core.S:17:
   include/linux/linkage.h:166: note: macro "SYM_END" defined here
     166 | #define SYM_END(name, sym_type)    \
         | 

vim +/SYM_END +491 arch/arm64/crypto/aes-neonbs-core.S

   429	
   430		.align		4
   431	SYM_CODE_START_LOCAL(aesbs_encrypt8)
   432		ldr		q9, [bskey], #16		// round 0 key
   433		ldr		q8, M0SR
   434		ldr		q24, SR
   435	
   436		eor		v10.16b, v0.16b, v9.16b		// xor with round0 key
   437		eor		v11.16b, v1.16b, v9.16b
   438		tbl		v0.16b, {v10.16b}, v8.16b
   439		eor		v12.16b, v2.16b, v9.16b
   440		tbl		v1.16b, {v11.16b}, v8.16b
   441		eor		v13.16b, v3.16b, v9.16b
   442		tbl		v2.16b, {v12.16b}, v8.16b
   443		eor		v14.16b, v4.16b, v9.16b
   444		tbl		v3.16b, {v13.16b}, v8.16b
   445		eor		v15.16b, v5.16b, v9.16b
   446		tbl		v4.16b, {v14.16b}, v8.16b
   447		eor		v10.16b, v6.16b, v9.16b
   448		tbl		v5.16b, {v15.16b}, v8.16b
   449		eor		v11.16b, v7.16b, v9.16b
   450		tbl		v6.16b, {v10.16b}, v8.16b
   451		tbl		v7.16b, {v11.16b}, v8.16b
   452	
   453		bitslice	v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11
   454	
   455		sub		rounds, rounds, #1
   456		b		.Lenc_sbox
   457	
   458	.Lenc_loop:
   459		shift_rows	v0, v1, v2, v3, v4, v5, v6, v7, v24
   460	.Lenc_sbox:
   461		sbox		v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, \
   462									v13, v14, v15
   463		subs		rounds, rounds, #1
   464		b.cc		.Lenc_done
   465	
   466		enc_next_rk
   467	
   468		mix_cols	v0, v1, v4, v6, v3, v7, v2, v5, v8, v9, v10, v11, v12, \
   469									v13, v14, v15
   470	
   471		add_round_key	v0, v1, v2, v3, v4, v5, v6, v7
   472	
   473		b.ne		.Lenc_loop
   474		ldr		q24, SRM0
   475		b		.Lenc_loop
   476	
   477	.Lenc_done:
   478		ldr		q12, [bskey]			// last round key
   479	
   480		bitslice	v0, v1, v4, v6, v3, v7, v2, v5, v8, v9, v10, v11
   481	
   482		eor		v0.16b, v0.16b, v12.16b
   483		eor		v1.16b, v1.16b, v12.16b
   484		eor		v4.16b, v4.16b, v12.16b
   485		eor		v6.16b, v6.16b, v12.16b
   486		eor		v3.16b, v3.16b, v12.16b
   487		eor		v7.16b, v7.16b, v12.16b
   488		eor		v2.16b, v2.16b, v12.16b
   489		eor		v5.16b, v5.16b, v12.16b
   490		ret
 > 491	SYM_END(aesbs_encrypt8)
   492	
   493		.align		4
   494	SYM_CODE_START_LOCAL(aesbs_decrypt8)
   495		lsl		x9, rounds, #7
   496		add		bskey, bskey, x9
   497	
   498		ldr		q9, [bskey, #-112]!		// round 0 key
   499		ldr		q8, M0ISR
   500		ldr		q24, ISR
   501	
   502		eor		v10.16b, v0.16b, v9.16b		// xor with round0 key
   503		eor		v11.16b, v1.16b, v9.16b
   504		tbl		v0.16b, {v10.16b}, v8.16b
   505		eor		v12.16b, v2.16b, v9.16b
   506		tbl		v1.16b, {v11.16b}, v8.16b
   507		eor		v13.16b, v3.16b, v9.16b
   508		tbl		v2.16b, {v12.16b}, v8.16b
   509		eor		v14.16b, v4.16b, v9.16b
   510		tbl		v3.16b, {v13.16b}, v8.16b
   511		eor		v15.16b, v5.16b, v9.16b
   512		tbl		v4.16b, {v14.16b}, v8.16b
   513		eor		v10.16b, v6.16b, v9.16b
   514		tbl		v5.16b, {v15.16b}, v8.16b
   515		eor		v11.16b, v7.16b, v9.16b
   516		tbl		v6.16b, {v10.16b}, v8.16b
   517		tbl		v7.16b, {v11.16b}, v8.16b
   518	
   519		bitslice	v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11
   520	
   521		sub		rounds, rounds, #1
   522		b		.Ldec_sbox
   523	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
Dave Martin Oct. 6, 2020, 9:38 a.m. UTC | #7
On Mon, Oct 05, 2020 at 02:24:47PM -0500, Jeremy Linton wrote:
> Hi,
> 
> On 10/5/20 1:54 PM, Ard Biesheuvel wrote:
> >On Mon, 5 Oct 2020 at 20:18, Jeremy Linton <jeremy.linton@arm.com> wrote:
> >>
> >>The AES code uses a 'br x7' as part of a function called by
> >>a macro, that ends up needing a BTI_J as a target.
> >
> >Could we instead just drop the tail call, i.e, replace it with a ret
> >and do a 'bl' after it returns? The indirect call does not really
> >serve a purpose here anyway
> 
> Yes, that is an option, it adds an extra ret. Which probably doesn't mean
> much in most cases. I assumed this code was optimized this way because it
> mattered somewhere.

Since this really does seem to be a tail-call and since x16 and x17
appear to be otherwise unused here, can we not just use x16 or x17
instead of x7?

This relies on there being no other calls to veneered functions in the
mix, but this code is all in a single section so that shouldn't be a
concern.

Due to the magic status of x16 and x17 in br instructions, the resulting
jump should be compatible with BTI c.  I think this matches how the
compiler should typically compile tail-calls.

Cheers
---Dave
Ard Biesheuvel Oct. 6, 2020, 9:50 a.m. UTC | #8
On Tue, 6 Oct 2020 at 11:38, Dave Martin <Dave.Martin@arm.com> wrote:
>
> On Mon, Oct 05, 2020 at 02:24:47PM -0500, Jeremy Linton wrote:
> > Hi,
> >
> > On 10/5/20 1:54 PM, Ard Biesheuvel wrote:
> > >On Mon, 5 Oct 2020 at 20:18, Jeremy Linton <jeremy.linton@arm.com> wrote:
> > >>
> > >>The AES code uses a 'br x7' as part of a function called by
> > >>a macro, that ends up needing a BTI_J as a target.
> > >
> > >Could we instead just drop the tail call, i.e, replace it with a ret
> > >and do a 'bl' after it returns? The indirect call does not really
> > >serve a purpose here anyway
> >
> > Yes, that is an option, it adds an extra ret. Which probably doesn't mean
> > much in most cases. I assumed this code was optimized this way because it
> > mattered somewhere.
>
> Since this really does seem to be a tail-call and since x16 and x17
> appear to be otherwise unused here, can we not just use x16 or x17
> instead of x7?
>
> This relies on there being no other calls to veneered functions in the
> mix, but this code is all in a single section so that shouldn't be a
> concern.
>
> Due to the magic status of x16 and x17 in br instructions, the resulting
> jump should be compatible with BTI c.  I think this matches how the
> compiler should typically compile tail-calls.
>

Ah, excellent point. That is definitely the cleanest fix.
diff mbox series

Patch

diff --git a/arch/arm64/crypto/aes-neonbs-core.S b/arch/arm64/crypto/aes-neonbs-core.S
index b357164379f6..76de87ee80e1 100644
--- a/arch/arm64/crypto/aes-neonbs-core.S
+++ b/arch/arm64/crypto/aes-neonbs-core.S
@@ -428,7 +428,7 @@  SYM_FUNC_START(aesbs_convert_key)
 SYM_FUNC_END(aesbs_convert_key)
 
 	.align		4
-SYM_FUNC_START_LOCAL(aesbs_encrypt8)
+SYM_CODE_START_LOCAL(aesbs_encrypt8)
 	ldr		q9, [bskey], #16		// round 0 key
 	ldr		q8, M0SR
 	ldr		q24, SR
@@ -488,10 +488,10 @@  SYM_FUNC_START_LOCAL(aesbs_encrypt8)
 	eor		v2.16b, v2.16b, v12.16b
 	eor		v5.16b, v5.16b, v12.16b
 	ret
-SYM_FUNC_END(aesbs_encrypt8)
+SYM_END(aesbs_encrypt8)
 
 	.align		4
-SYM_FUNC_START_LOCAL(aesbs_decrypt8)
+SYM_CODE_START_LOCAL(aesbs_decrypt8)
 	lsl		x9, rounds, #7
 	add		bskey, bskey, x9
 
@@ -553,7 +553,7 @@  SYM_FUNC_START_LOCAL(aesbs_decrypt8)
 	eor		v3.16b, v3.16b, v12.16b
 	eor		v5.16b, v5.16b, v12.16b
 	ret
-SYM_FUNC_END(aesbs_decrypt8)
+SYM_END(aesbs_decrypt8)
 
 	/*
 	 * aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
diff --git a/arch/arm64/include/asm/linkage.h b/arch/arm64/include/asm/linkage.h
index ba89a9af820a..92c81ae8ac1e 100644
--- a/arch/arm64/include/asm/linkage.h
+++ b/arch/arm64/include/asm/linkage.h
@@ -11,7 +11,8 @@ 
  * set the architecture version to v8.5 we use the hint instruction
  * instead.
  */
-#define BTI_C hint 34 ;
+#define BTI_C  hint 34 ;
+#define BTI_JC hint 38 ;
 
 /*
  * When using in-kernel BTI we need to ensure that PCS-conformant assembly
@@ -42,6 +43,9 @@ 
 	SYM_START(name, SYM_L_WEAK, SYM_A_NONE)		\
 	BTI_C
 
+#define SYM_CODE_START_LOCAL(name)			\
+	SYM_START(name, SYM_L_LOCAL, SYM_A_ALIGN)       \
+	BTI_JC
 #endif
 
 /*