diff mbox series

[2/3] crypto: x86/twofish-3way - Perform cbc xor in assembly

Message ID 80b492462c1741c384f326a01856791a517bc251.1675653010.git.peter@n8pjl.ca (mailing list archive)
State Changes Requested
Delegated to: Herbert Xu
Headers show
Series crypto: x86/twofish-3way - Cleanup and optimize asm | expand

Commit Message

Peter Lafreniere Feb. 6, 2023, 3:31 a.m. UTC
Optimize twofish-3way cbc decryption by keeping intermediate results in
registers until computations are finished, rather than storing in
assembly, then immediately reloading them in glue code. Additionally,
keeping all operations in assembly can avoid a memcpy() call when the
decryption is being done in place.

cbc decoding speedups: (tcrypt mode=202 on a znver1)
64B: +7.7%, 128B: +6.3%, 256B: +6.8%

Signed-off-by: Peter Lafreniere <peter@n8pjl.ca>
---
 arch/x86/crypto/twofish-x86_64-asm_64-3way.S | 33 ++++++++++++++++++--
 arch/x86/crypto/twofish.h                    | 16 ++++++++--
 arch/x86/crypto/twofish_glue_3way.c          | 15 +--------
 3 files changed, 45 insertions(+), 19 deletions(-)
diff mbox series

Patch

diff --git a/arch/x86/crypto/twofish-x86_64-asm_64-3way.S b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S
index fa11513dbbf1..29e0fe664386 100644
--- a/arch/x86/crypto/twofish-x86_64-asm_64-3way.S
+++ b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S
@@ -220,6 +220,20 @@ 
 	rorq $32,			RAB2; \
 	outunpack3(mov, RIO, 2, RAB, 2);
 
+#define outunpack_cbc_dec3() \
+	rorq $32,			RCD0; \
+	rorq $32,			RCD1; \
+	xorq (RT1),			RCD1; \
+	rorq $32,			RCD2; \
+	xorq 16(RT1),			RCD2; \
+	outunpack3(mov, RIO, 0, RCD, 0); \
+	rorq $32,			RAB0; \
+	rorq $32,			RAB1; \
+	xorq 8(RT1),			RAB1; \
+	rorq $32,			RAB2; \
+	xorq 24(RT1),			RAB2; \
+	outunpack3(mov, RIO, 2, RAB, 2);
+
 SYM_FUNC_START(twofish_enc_blk_3way)
 	/* input:
 	 *	%rdi: ctx, CTX
@@ -255,17 +269,20 @@  SYM_FUNC_START(twofish_enc_blk_3way)
 	RET;
 SYM_FUNC_END(twofish_enc_blk_3way)
 
-SYM_FUNC_START(twofish_dec_blk_3way)
+SYM_FUNC_START(__twofish_dec_blk_3way)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: dst
 	 *	%rdx: src, RIO
+	 *	%rcx: cbc (bool)
 	 */
 	pushq %r13;
 	pushq %r12;
 	pushq %rbx;
 
 	pushq %rsi; /* dst */
+	pushq %rdx; /* src */
+	pushq %rcx; /* cbc */
 
 	inpack_dec3();
 
@@ -280,12 +297,24 @@  SYM_FUNC_START(twofish_dec_blk_3way)
 	decrypt_cycle3(RAB, CD, 0);
 	pop_cd();
 
+	popq RT0; /* cbc */
+	popq RT1; /* src */
 	popq RIO; /* dst */
 
+	testq RT0, RT0;
+	jnz .L_dec_cbc;
+
 	outunpack_dec3();
 
 	popq %rbx;
 	popq %r12;
 	popq %r13;
 	RET;
-SYM_FUNC_END(twofish_dec_blk_3way)
+
+.L_dec_cbc:
+	outunpack_cbc_dec3();
+	popq %rbx;
+	popq %r12;
+	popq %r13;
+	RET;
+SYM_FUNC_END(__twofish_dec_blk_3way)
diff --git a/arch/x86/crypto/twofish.h b/arch/x86/crypto/twofish.h
index feb0a6f820a6..ede02a8b36d4 100644
--- a/arch/x86/crypto/twofish.h
+++ b/arch/x86/crypto/twofish.h
@@ -12,9 +12,19 @@  asmlinkage void twofish_dec_blk(const void *ctx, u8 *dst, const u8 *src);
 
 /* 3-way parallel cipher functions */
 asmlinkage void twofish_enc_blk_3way(const void *ctx, u8 *dst, const u8 *src);
-asmlinkage void twofish_dec_blk_3way(const void *ctx, u8 *dst, const u8 *src);
+asmlinkage void __twofish_dec_blk_3way(const void *ctx, u8 *dst,
+				       const u8 *src, bool cbc);
 
-/* helpers from twofish_x86_64-3way module */
-extern void twofish_dec_blk_cbc_3way(const void *ctx, u8 *dst, const u8 *src);
+/* helpers for use of __twofish_dec_blk_3way() */
+static inline void twofish_dec_blk_3way(const void *ctx, u8 *dst,
+					const u8 *src)
+{
+	return __twofish_dec_blk_3way(ctx, dst, src, false);
+}
+static inline void twofish_dec_blk_cbc_3way(const void *ctx, u8 *dst,
+					    const u8 *src)
+{
+	return __twofish_dec_blk_3way(ctx, dst, src, true);
+}
 
 #endif /* ASM_X86_TWOFISH_H */
diff --git a/arch/x86/crypto/twofish_glue_3way.c b/arch/x86/crypto/twofish_glue_3way.c
index c331c4ca9363..1f6944620dc5 100644
--- a/arch/x86/crypto/twofish_glue_3way.c
+++ b/arch/x86/crypto/twofish_glue_3way.c
@@ -16,7 +16,7 @@ 
 #include "ecb_cbc_helpers.h"
 
 EXPORT_SYMBOL_GPL(twofish_enc_blk_3way);
-EXPORT_SYMBOL_GPL(twofish_dec_blk_3way);
+EXPORT_SYMBOL_GPL(__twofish_dec_blk_3way);
 
 static int twofish_setkey_skcipher(struct crypto_skcipher *tfm,
 				   const u8 *key, unsigned int keylen)
@@ -24,19 +24,6 @@  static int twofish_setkey_skcipher(struct crypto_skcipher *tfm,
 	return twofish_setkey(&tfm->base, key, keylen);
 }
 
-void twofish_dec_blk_cbc_3way(const void *ctx, u8 *dst, const u8 *src)
-{
-	u8 buf[2][TF_BLOCK_SIZE];
-	const u8 *s = src;
-
-	if (dst == src)
-		s = memcpy(buf, src, sizeof(buf));
-	twofish_dec_blk_3way(ctx, dst, src);
-	crypto_xor(dst + TF_BLOCK_SIZE, s, sizeof(buf));
-
-}
-EXPORT_SYMBOL_GPL(twofish_dec_blk_cbc_3way);
-
 static int ecb_encrypt(struct skcipher_request *req)
 {
 	ECB_WALK_START(req, TF_BLOCK_SIZE, -1);