diff mbox series

kvm: fix multiple SIMD prefixes decoding

Message ID 4d583cfe-eb6a-4433-8ab1-367f1e25263b@p183 (mailing list archive)
State New, archived
Headers show
Series kvm: fix multiple SIMD prefixes decoding | expand

Commit Message

Alexey Dobriyan May 29, 2023, 5:41 p.m. UTC
x86_decode_insn() can decode SSE instructions with multiple mandatory
prefixes incorrectly. Apparently, f2/f3 overturns 66 regardless of
relative position. Current code does the opposite: 66 wins and refuses
to emulate instruction with both 66 and f2/f3 for no reason.

This is easy to see switching between movdqa/movdqu with unaligned loads.

.intel_syntax noprefix
.global f
f:
	push	rbp
	mov	rbp, rsp
	and	rsp, ~15
	add	rsp, 1

	#.byte 0x66, 0x0f, 0x6f, 0x04, 0x24		# SIGSEGV movdqa xmm0, [rsp]
	.byte 0xf3, 0x0f, 0x6f, 0x04, 0x24		# OK movdqu xmm0, [rsp]

	#.byte 0x66, 0x66, 0x0f, 0x6f, 0x04, 0x24	# SIGSEGV movdqa xmm0, [rsp]
	.byte 0x66, 0xf3, 0x0f, 0x6f, 0x04, 0x24	# OK movdqu xmm0, [rsp]
	.byte 0xf3, 0x66, 0x0f, 0x6f, 0x04, 0x24	# OK movdqu xmm0, [rsp]
	.byte 0xf3, 0xf3, 0x0f, 0x6f, 0x04, 0x24	# OK movdqu xmm0, [rsp]

	#.byte 0x66, 0x66, 0x66, 0x0f, 0x6f, 0x04, 0x24	# SIGSEGV movdqa xmm0, [rsp]
	.byte 0x66, 0x66, 0xf3, 0x0f, 0x6f, 0x04, 0x24	# OK movdqu xmm0, [rsp]
	.byte 0x66, 0xf3, 0x66, 0x0f, 0x6f, 0x04, 0x24	# OK movdqu xmm0, [rsp]
	.byte 0x66, 0xf3, 0xf3, 0x0f, 0x6f, 0x04, 0x24	# OK movdqu xmm0, [rsp]
	.byte 0xf3, 0x66, 0x66, 0x0f, 0x6f, 0x04, 0x24	# OK movdqu xmm0, [rsp]
	.byte 0xf3, 0x66, 0xf3, 0x0f, 0x6f, 0x04, 0x24	# OK movdqu xmm0, [rsp]
	.byte 0xf3, 0xf3, 0x66, 0x0f, 0x6f, 0x04, 0x24	# OK movdqu xmm0, [rsp]
	.byte 0xf3, 0xf3, 0xf3, 0x0f, 0x6f, 0x04, 0x24	# OK movdqu xmm0, [rsp]

	mov	rsp, rbp
	pop	rbp
	ret

Tentative patch below (how do you even test it?).

References:

objdump: opcodes/i386-dis.c get_valid_dis386()
bochs: bochs/cpu/decoder/fetchdecode64.cc fetchDecode64()

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
---

	Do we even care given that compiler won't emit such instruction?

 arch/x86/kvm/emulate.c |   17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)
diff mbox series

Patch

--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -4763,8 +4763,8 @@  int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int
 {
 	int rc = X86EMUL_CONTINUE;
 	int mode = ctxt->mode;
-	int def_op_bytes, def_ad_bytes, goffset, simd_prefix;
-	bool op_prefix = false;
+	int def_op_bytes, def_ad_bytes, goffset;
+	u8 simd_prefix = 0;
 	bool has_seg_override = false;
 	struct opcode opcode;
 	u16 dummy;
@@ -4816,7 +4816,13 @@  int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int
 	for (;;) {
 		switch (ctxt->b = insn_fetch(u8, ctxt)) {
 		case 0x66:	/* operand-size override */
-			op_prefix = true;
+			/*
+			 * Believe it or not, f2/f3 are more equal than 66.
+			 * Homework:
+			 * 1. [M21] Write a program to show this effect.
+			 * 2. [M49] Find corresponding clause in SDM.
+			 */
+			simd_prefix = simd_prefix ?: 0x66;
 			/* switch between 2/4 bytes */
 			ctxt->op_bytes = def_op_bytes ^ 6;
 			break;
@@ -4862,7 +4868,7 @@  int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int
 			break;
 		case 0xf2:	/* REPNE/REPNZ */
 		case 0xf3:	/* REP/REPE/REPZ */
-			ctxt->rep_prefix = ctxt->b;
+			ctxt->rep_prefix = simd_prefix = ctxt->b;
 			break;
 		default:
 			goto done_prefixes;
@@ -4923,9 +4929,6 @@  int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int
 			opcode = opcode.u.group[goffset];
 			break;
 		case Prefix:
-			if (ctxt->rep_prefix && op_prefix)
-				return EMULATION_FAILED;
-			simd_prefix = op_prefix ? 0x66 : ctxt->rep_prefix;
 			switch (simd_prefix) {
 			case 0x00: opcode = opcode.u.gprefix->pfx_no; break;
 			case 0x66: opcode = opcode.u.gprefix->pfx_66; break;