@@ -4763,8 +4763,8 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int
{
int rc = X86EMUL_CONTINUE;
int mode = ctxt->mode;
- int def_op_bytes, def_ad_bytes, goffset, simd_prefix;
- bool op_prefix = false;
+ int def_op_bytes, def_ad_bytes, goffset;
+ u8 simd_prefix = 0;
bool has_seg_override = false;
struct opcode opcode;
u16 dummy;
@@ -4816,7 +4816,13 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int
for (;;) {
switch (ctxt->b = insn_fetch(u8, ctxt)) {
case 0x66: /* operand-size override */
- op_prefix = true;
+ /*
+ * Believe it or not, f2/f3 are more equal than 66.
+ * Homework:
+ * 1. [M21] Write a program to show this effect.
+ * 2. [M49] Find corresponding clause in SDM.
+ */
+ simd_prefix = simd_prefix ?: 0x66;
/* switch between 2/4 bytes */
ctxt->op_bytes = def_op_bytes ^ 6;
break;
@@ -4862,7 +4868,7 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int
break;
case 0xf2: /* REPNE/REPNZ */
case 0xf3: /* REP/REPE/REPZ */
- ctxt->rep_prefix = ctxt->b;
+ ctxt->rep_prefix = simd_prefix = ctxt->b;
break;
default:
goto done_prefixes;
@@ -4923,9 +4929,6 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int
opcode = opcode.u.group[goffset];
break;
case Prefix:
- if (ctxt->rep_prefix && op_prefix)
- return EMULATION_FAILED;
- simd_prefix = op_prefix ? 0x66 : ctxt->rep_prefix;
switch (simd_prefix) {
case 0x00: opcode = opcode.u.gprefix->pfx_no; break;
case 0x66: opcode = opcode.u.gprefix->pfx_66; break;
x86_decode_insn() can decode SSE instructions with multiple mandatory prefixes incorrectly. Apparently, f2/f3 overturns 66 regardless of relative position. Current code does the opposite: 66 wins and refuses to emulate instruction with both 66 and f2/f3 for no reason. This is easy to see switching between movdqa/movdqu with unaligned loads. .intel_syntax noprefix .global f f: push rbp mov rbp, rsp and rsp, ~15 add rsp, 1 #.byte 0x66, 0x0f, 0x6f, 0x04, 0x24 # SIGSEGV movdqa xmm0, [rsp] .byte 0xf3, 0x0f, 0x6f, 0x04, 0x24 # OK movdqu xmm0, [rsp] #.byte 0x66, 0x66, 0x0f, 0x6f, 0x04, 0x24 # SIGSEGV movdqa xmm0, [rsp] .byte 0x66, 0xf3, 0x0f, 0x6f, 0x04, 0x24 # OK movdqu xmm0, [rsp] .byte 0xf3, 0x66, 0x0f, 0x6f, 0x04, 0x24 # OK movdqu xmm0, [rsp] .byte 0xf3, 0xf3, 0x0f, 0x6f, 0x04, 0x24 # OK movdqu xmm0, [rsp] #.byte 0x66, 0x66, 0x66, 0x0f, 0x6f, 0x04, 0x24 # SIGSEGV movdqa xmm0, [rsp] .byte 0x66, 0x66, 0xf3, 0x0f, 0x6f, 0x04, 0x24 # OK movdqu xmm0, [rsp] .byte 0x66, 0xf3, 0x66, 0x0f, 0x6f, 0x04, 0x24 # OK movdqu xmm0, [rsp] .byte 0x66, 0xf3, 0xf3, 0x0f, 0x6f, 0x04, 0x24 # OK movdqu xmm0, [rsp] .byte 0xf3, 0x66, 0x66, 0x0f, 0x6f, 0x04, 0x24 # OK movdqu xmm0, [rsp] .byte 0xf3, 0x66, 0xf3, 0x0f, 0x6f, 0x04, 0x24 # OK movdqu xmm0, [rsp] .byte 0xf3, 0xf3, 0x66, 0x0f, 0x6f, 0x04, 0x24 # OK movdqu xmm0, [rsp] .byte 0xf3, 0xf3, 0xf3, 0x0f, 0x6f, 0x04, 0x24 # OK movdqu xmm0, [rsp] mov rsp, rbp pop rbp ret Tentative patch below (how do you even test it?). References: objdump: opcodes/i386-dis.c get_valid_dis386() bochs: bochs/cpu/decoder/fetchdecode64.cc fetchDecode64() Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com> --- Do we even care given that compiler won't emit such instruction? arch/x86/kvm/emulate.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-)