@@ -60,7 +60,7 @@ SYM_FUNC_START(ce_aes_ccm_final)
ret
SYM_FUNC_END(ce_aes_ccm_final)
- .macro aes_ccm_do_crypt,enc
+SYM_FUNC_START_LOCAL(aes_ccm_do_crypt)
load_round_keys x3, w4, x10
cbz x2, 5f
@@ -76,28 +76,24 @@ CPU_LE( rev x8, x8 ) /* keep swabbed ctr in reg */
aes_encrypt v0, v1, w4
+ eor v0.16b, v0.16b, v5.16b /* final round mac */
+ eor v1.16b, v1.16b, v5.16b /* final round enc */
subs w2, w2, #16
bmi 6f /* partial block? */
ld1 {v2.16b}, [x1], #16 /* load next input block */
- .if \enc == 1
- eor v2.16b, v2.16b, v5.16b /* final round enc+mac */
- eor v6.16b, v1.16b, v2.16b /* xor with crypted ctr */
- .else
- eor v2.16b, v2.16b, v1.16b /* xor with crypted ctr */
- eor v6.16b, v2.16b, v5.16b /* final round enc */
- .endif
- eor v0.16b, v0.16b, v2.16b /* xor mac with pt ^ rk[last] */
+ eor v6.16b, v2.16b, v1.16b /* en/decrypt input block */
+ mov v23.16b, v22.16b
+ bsl v23.16b, v2.16b, v6.16b /* select plaintext */
st1 {v6.16b}, [x0], #16 /* write output block */
+ eor v0.16b, v0.16b, v23.16b /* fold plaintext into mac */
+
bne 0b
CPU_LE( rev x8, x8 )
st1 {v0.16b}, [x5] /* store mac */
str x8, [x6, #8] /* store lsb end of ctr (BE) */
5: ret
-6: eor v0.16b, v0.16b, v5.16b /* final round mac */
- eor v1.16b, v1.16b, v5.16b /* final round enc */
-
- add x1, x1, w2, sxtw /* rewind the input pointer (w2 < 0) */
+6: add x1, x1, w2, sxtw /* rewind the input pointer (w2 < 0) */
add x0, x0, w2, sxtw /* rewind the output pointer */
adr_l x8, .Lpermute /* load permute vectors */
@@ -108,20 +104,17 @@ CPU_LE( rev x8, x8 )
ld1 {v2.16b}, [x1] /* load a full block of input */
tbl v1.16b, {v1.16b}, v7.16b /* move keystream to end of register */
- .if \enc == 1
- tbl v7.16b, {v2.16b}, v9.16b /* copy plaintext to start of v7 */
+ tbl v7.16b, {v2.16b}, v9.16b /* copy input block to start of v7 */
eor v2.16b, v2.16b, v1.16b /* encrypt partial input block */
- .else
- eor v2.16b, v2.16b, v1.16b /* decrypt partial input block */
- tbl v7.16b, {v2.16b}, v9.16b /* copy plaintext to start of v7 */
- .endif
- eor v0.16b, v0.16b, v7.16b /* fold plaintext into mac */
+ tbl v9.16b, {v2.16b}, v9.16b /* copy output block to start of v9 */
+ bsl v22.16b, v7.16b, v9.16b /* select plaintext */
+ eor v0.16b, v0.16b, v22.16b /* fold plaintext into mac */
tbx v2.16b, {v6.16b}, v8.16b /* insert output from previous iteration */
st1 {v0.16b}, [x5] /* store mac */
st1 {v2.16b}, [x0] /* store output block */
ret
- .endm
+SYM_FUNC_END(aes_ccm_do_crypt)
/*
* void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes,
@@ -132,11 +125,13 @@ CPU_LE( rev x8, x8 )
* u8 ctr[]);
*/
SYM_FUNC_START(ce_aes_ccm_encrypt)
- aes_ccm_do_crypt 1
+ movi v22.16b, #255
+ b aes_ccm_do_crypt
SYM_FUNC_END(ce_aes_ccm_encrypt)
SYM_FUNC_START(ce_aes_ccm_decrypt)
- aes_ccm_do_crypt 0
+ movi v22.16b, #0
+ b aes_ccm_do_crypt
SYM_FUNC_END(ce_aes_ccm_decrypt)
.section ".rodata", "a"