@@ -150,6 +150,15 @@
#define dummy2(a, b) /*_*/
+#ifdef CONFIG_MARCH_NATIVE_MOVBE
+#define read_block(io, left, right) \
+ movbe (io), left##d; \
+ movbe 4(io), right##d;
+
+#define write_block(io, left, right) \
+ movbe left##d, (io); \
+ movbe right##d, 4(io);
+#else
#define read_block(io, left, right) \
movl (io), left##d; \
movl 4(io), right##d; \
@@ -161,6 +170,7 @@
bswapl right##d; \
movl left##d, (io); \
movl right##d, 4(io);
+#endif
ENTRY(des3_ede_x86_64_crypt_blk)
/* input:
@@ -434,6 +444,14 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way)
pushq %rsi /* dst */
/* load input */
+#ifdef CONFIG_MARCH_NATIVE_MOVBE
+ movbe 0 * 4(%rdx), RL0d;
+ movbe 1 * 4(%rdx), RR0d;
+ movbe 2 * 4(%rdx), RL1d;
+ movbe 3 * 4(%rdx), RR1d;
+ movbe 4 * 4(%rdx), RL2d;
+ movbe 5 * 4(%rdx), RR2d;
+#else
movl 0 * 4(%rdx), RL0d;
movl 1 * 4(%rdx), RR0d;
movl 2 * 4(%rdx), RL1d;
@@ -447,6 +465,7 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way)
bswapl RR1d;
bswapl RL2d;
bswapl RR2d;
+#endif
initial_permutation3(RL, RR);
@@ -507,6 +526,14 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way)
final_permutation3(RR, RL);
+#ifdef CONFIG_MARCH_NATIVE_MOVBE
+ movbe RR0d, 0 * 4(%rsi);
+ movbe RL0d, 1 * 4(%rsi);
+ movbe RR1d, 2 * 4(%rsi);
+ movbe RL1d, 3 * 4(%rsi);
+ movbe RR2d, 4 * 4(%rsi);
+ movbe RL2d, 5 * 4(%rsi);
+#else
bswapl RR0d;
bswapl RL0d;
bswapl RR1d;
@@ -521,6 +548,7 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way)
movl RL1d, 3 * 4(%rsi);
movl RR2d, 4 * 4(%rsi);
movl RL2d, 5 * 4(%rsi);
+#endif
popq %r15;
popq %r14;
@@ -142,6 +142,13 @@ ENTRY(verify_cpu)
jnc .Lverify_cpu_no_longmode
#endif
+#ifdef CONFIG_MARCH_NATIVE_MOVBE
+ mov $1, %eax
+ cpuid
+ bt $22, %ecx
+ jnc .Lverify_cpu_no_longmode
+#endif
+
#if defined(CONFIG_MARCH_NATIVE_REP_MOVSB) || defined(CONFIG_MARCH_NATIVE_REP_STOSB)
xor %eax, %eax
cpuid
@@ -43,6 +43,7 @@ static inline void cpuid2(uint32_t eax0, uint32_t ecx0, uint32_t *eax, uint32_t
);
}
+static bool movbe = false;
static bool popcnt = false;
static bool rep_movsb = false;
static bool rep_stosb = false;
@@ -57,6 +58,9 @@ static void intel(void)
cpuid(1, &eax, &ecx, &edx, &ebx);
// printf("%08x %08x %08x %08x\n", eax, ecx, edx, ebx);
+ if (ecx & (1 << 22)) {
+ movbe = true;
+ }
if (ecx & (1 << 23)) {
popcnt = true;
}
@@ -89,6 +93,7 @@ int main(int argc, char *argv[])
}
#define _(x) if (streq(opt, #x)) return x ? EXIT_SUCCESS : EXIT_FAILURE
+ _(movbe);
_(popcnt);
_(rep_movsb);
_(rep_stosb);
@@ -41,6 +41,7 @@ COLLECT_GCC_OPTIONS=$(
)
echo "-march=native: $COLLECT_GCC_OPTIONS"
+"$CPUID" movbe && option "CONFIG_MARCH_NATIVE_MOVBE"
"$CPUID" popcnt && option "CONFIG_MARCH_NATIVE_POPCNT"
"$CPUID" rep_movsb && option "CONFIG_MARCH_NATIVE_REP_MOVSB"
"$CPUID" rep_stosb && option "CONFIG_MARCH_NATIVE_REP_STOSB"
Use MOVBE if it is available. Internally MOVBE probably translates to MOV+BSWAP anyway, but who knows. Do it because it is easy to do... Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com> --- arch/x86/crypto/des3_ede-asm_64.S | 28 ++++++++++++++++++++++++++++ arch/x86/kernel/verify_cpu.S | 7 +++++++ scripts/kconfig/cpuid.c | 5 +++++ scripts/march-native.sh | 1 + 4 files changed, 41 insertions(+)