From patchwork Fri Mar 11 17:35:25 2016 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Jan Beulich X-Patchwork-Id: 8568221 Return-Path: X-Original-To: patchwork-xen-devel@patchwork.kernel.org Delivered-To: patchwork-parsemail@patchwork1.web.kernel.org Received: from mail.kernel.org (mail.kernel.org [198.145.29.136]) by patchwork1.web.kernel.org (Postfix) with ESMTP id 5B25A9F1C0 for ; Fri, 11 Mar 2016 17:38:27 +0000 (UTC) Received: from mail.kernel.org (localhost [127.0.0.1]) by mail.kernel.org (Postfix) with ESMTP id 96E4120259 for ; Fri, 11 Mar 2016 17:38:25 +0000 (UTC) Received: from lists.xenproject.org (lists.xenproject.org [192.237.175.120]) (using TLSv1.2 with cipher AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id C826E20253 for ; Fri, 11 Mar 2016 17:38:23 +0000 (UTC) Received: from localhost ([127.0.0.1] helo=lists.xenproject.org) by lists.xen.org with esmtp (Exim 4.84) (envelope-from ) id 1aeQyI-0004PL-EM; Fri, 11 Mar 2016 17:35:30 +0000 Received: from mail6.bemta14.messagelabs.com ([193.109.254.103]) by lists.xen.org with esmtp (Exim 4.84) (envelope-from ) id 1aeQyH-0004P7-7v for xen-devel@lists.xenproject.org; Fri, 11 Mar 2016 17:35:29 +0000 Received: from [193.109.254.147] by server-7.bemta-14.messagelabs.com id B2/D5-04065-0E103E65; Fri, 11 Mar 2016 17:35:28 +0000 X-Env-Sender: JBeulich@suse.com X-Msg-Ref: server-2.tower-27.messagelabs.com!1457717725!28594794!1 X-Originating-IP: [137.65.248.74] X-SpamReason: No, hits=0.0 required=7.0 tests= X-StarScan-Received: X-StarScan-Version: 8.11; banners=-,-,- X-VirusChecked: Checked Received: (qmail 37451 invoked from network); 11 Mar 2016 17:35:26 -0000 Received: from prv-mh.provo.novell.com (HELO prv-mh.provo.novell.com) (137.65.248.74) by server-2.tower-27.messagelabs.com with DHE-RSA-AES256-GCM-SHA384 encrypted SMTP; 11 Mar 2016 17:35:26 -0000 Received: from INET-PRV-MTA by prv-mh.provo.novell.com with Novell_GroupWise; Fri, 11 Mar 2016 10:35:24 -0700 Message-Id: <56E30FED02000078000DBB93@prv-mh.provo.novell.com> X-Mailer: Novell GroupWise Internet Agent 14.2.0 Date: Fri, 11 Mar 2016 10:35:25 -0700 From: "Jan Beulich" To: "xen-devel" References: <56E30EA102000078000DBB7F@prv-mh.provo.novell.com> In-Reply-To: <56E30EA102000078000DBB7F@prv-mh.provo.novell.com> Mime-Version: 1.0 Cc: Andrew Cooper , Keir Fraser Subject: [Xen-devel] [PATCH 3/3] x86emul: support MOVBE and CRC32 X-BeenThere: xen-devel@lists.xen.org X-Mailman-Version: 2.1.18 Precedence: list List-Id: Xen developer discussion List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Errors-To: xen-devel-bounces@lists.xen.org Sender: "Xen-devel" X-Spam-Status: No, score=-1.9 required=5.0 tests=BAYES_00, UNPARSEABLE_RELAY autolearn=unavailable version=3.3.1 X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on mail.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP The former in an attempt to at least gradually support all simple data movement instructions. The latter just because it shares the opcode with the former. Signed-off-by: Jan Beulich x86emul: support MOVBE and CRC32 The former in an attempt to at least gradually support all simple data movement instructions. The latter just because it shares the opcode with the former. Signed-off-by: Jan Beulich --- a/tools/tests/x86_emulator/test_x86_emulator.c +++ b/tools/tests/x86_emulator/test_x86_emulator.c @@ -78,7 +78,14 @@ static int cpuid( unsigned int *edx, struct x86_emulate_ctxt *ctxt) { + unsigned int leaf = *eax; + asm ("cpuid" : "+a" (*eax), "+c" (*ecx), "=d" (*edx), "=b" (*ebx)); + + /* The emulator doesn't itself use MOVBE, so we can always run the test. */ + if ( leaf == 1 ) + *ecx |= 1U << 22; + return X86EMUL_OKAY; } @@ -605,6 +612,34 @@ int main(int argc, char **argv) printf("skipped\n"); #endif + printf("%-40s", "Testing movbe (%%ecx),%%eax..."); + instr[0] = 0x0f; instr[1] = 0x38; instr[2] = 0xf0; instr[3] = 0x01; + regs.eflags = 0x200; + regs.eip = (unsigned long)&instr[0]; + regs.ecx = (unsigned long)res; + regs.eax = 0x11111111; + *res = 0x12345678; + rc = x86_emulate(&ctxt, &emulops); + if ( (rc != X86EMUL_OKAY) || + (*res != 0x12345678) || + (regs.eax != 0x78563412) || + (regs.eflags != 0x200) || + (regs.eip != (unsigned long)&instr[4]) ) + goto fail; + printf("okay\n"); + + printf("%-40s", "Testing movbe %%ax,(%%ecx)..."); + instr[0] = 0x66; instr[1] = 0x0f; instr[2] = 0x38; instr[3] = 0xf1; instr[4] = 0x01; + regs.eip = (unsigned long)&instr[0]; + rc = x86_emulate(&ctxt, &emulops); + if ( (rc != X86EMUL_OKAY) || + (*res != 0x12341234) || + (regs.eax != 0x78563412) || + (regs.eflags != 0x200) || + (regs.eip != (unsigned long)&instr[5]) ) + goto fail; + printf("okay\n"); + #define decl_insn(which) extern const unsigned char which[], which##_len[] #define put_insn(which, insn) ".pushsection .test, \"ax\", @progbits\n" \ #which ": " insn "\n" \ --- a/tools/tests/x86_emulator/x86_emulate.c +++ b/tools/tests/x86_emulator/x86_emulate.c @@ -12,6 +12,7 @@ typedef bool bool_t; #define BUG() abort() #define ASSERT assert +#define ASSERT_UNREACHABLE() assert(!__LINE__) #define cpu_has_amd_erratum(nr) 0 #define mark_regs_dirty(r) ((void)(r)) --- a/xen/arch/x86/Rules.mk +++ b/xen/arch/x86/Rules.mk @@ -16,6 +16,7 @@ CFLAGS += -msoft-float $(call cc-options-add,CFLAGS,CC,$(EMBEDDED_EXTRA_CFLAGS)) $(call cc-option-add,CFLAGS,CC,-Wnested-externs) $(call as-insn-check,CFLAGS,CC,"vmcall",-DHAVE_GAS_VMX) +$(call as-insn-check,CFLAGS,CC,"crc32 %eax$$(comma)%eax",-DHAVE_GAS_SSE4_2) $(call as-insn-check,CFLAGS,CC,"invept (%rax)$$(comma)%rax",-DHAVE_GAS_EPT) $(call as-insn-check,CFLAGS,CC,"rdfsbase %rax",-DHAVE_GAS_FSGSBASE) $(call as-insn-check,CFLAGS,CC,".equ \"x\"$$(comma)1", \ --- a/xen/arch/x86/x86_emulate/x86_emulate.c +++ b/xen/arch/x86/x86_emulate/x86_emulate.c @@ -188,7 +188,7 @@ static uint8_t twobyte_table[256] = { ImplicitOps, ImplicitOps, ImplicitOps, 0, ImplicitOps, ImplicitOps, 0, 0, /* 0x38 - 0x3F */ - 0, 0, 0, 0, 0, 0, 0, 0, + DstReg|SrcMem|ModRM, 0, 0, 0, 0, 0, 0, 0, /* 0x40 - 0x47 */ DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov, @@ -1091,6 +1091,8 @@ static bool_t vcpu_has( #define vcpu_must_have_sse2() vcpu_must_have(0x00000001, EDX, 26) #define vcpu_must_have_sse3() vcpu_must_have(0x00000001, ECX, 0) #define vcpu_must_have_cx16() vcpu_must_have(0x00000001, ECX, 13) +#define vcpu_must_have_sse4_2() vcpu_must_have(0x00000001, ECX, 20) +#define vcpu_must_have_movbe() vcpu_must_have(0x00000001, ECX, 22) #define vcpu_must_have_avx() vcpu_must_have(0x00000001, ECX, 28) #ifdef __XEN__ @@ -1503,8 +1505,9 @@ x86_emulate( /* Shadow copy of register state. Committed on successful emulation. */ struct cpu_user_regs _regs = *ctxt->regs; - uint8_t b, d, sib, sib_index, sib_base, twobyte = 0, rex_prefix = 0; + uint8_t b, d, sib, sib_index, sib_base, rex_prefix = 0; uint8_t modrm = 0, modrm_mod = 0, modrm_reg = 0, modrm_rm = 0; + enum { ext_none, ext_0f, ext_0f38 } ext = ext_none; union vex vex = {}; unsigned int op_bytes, def_op_bytes, ad_bytes, def_ad_bytes; bool_t lock_prefix = 0; @@ -1600,9 +1603,18 @@ x86_emulate( /* Two-byte opcode? */ if ( b == 0x0f ) { - twobyte = 1; b = insn_fetch_type(uint8_t); d = twobyte_table[b]; + switch ( b ) + { + default: + ext = ext_0f; + break; + case 0x38: + b = insn_fetch_type(uint8_t); + ext = ext_0f38; + break; + } } /* Unrecognised? */ @@ -1619,7 +1631,7 @@ x86_emulate( modrm = insn_fetch_type(uint8_t); modrm_mod = (modrm & 0xc0) >> 6; - if ( !twobyte && ((b & ~1) == 0xc4) ) + if ( !ext && ((b & ~1) == 0xc4) ) switch ( def_ad_bytes ) { default: @@ -1665,12 +1677,12 @@ x86_emulate( rex_prefix |= REX_R; fail_if(vex.opcx != vex_0f); - twobyte = 1; + ext = ext_0f; b = insn_fetch_type(uint8_t); d = twobyte_table[b]; /* Unrecognised? */ - if ( d == 0 ) + if ( d == 0 || b == 0x38 ) goto cannot_emulate; modrm = insn_fetch_type(uint8_t); @@ -1756,7 +1768,7 @@ x86_emulate( { ea.mem.seg = x86_seg_ss; ea.mem.off += _regs.esp; - if ( !twobyte && (b == 0x8f) ) + if ( !ext && (b == 0x8f) ) /* POP computes its EA post increment. */ ea.mem.off += ((mode_64bit() && (op_bytes == 4)) ? 8 : op_bytes); @@ -1791,12 +1803,12 @@ x86_emulate( ((op_bytes == 8) ? 4 : op_bytes); else if ( (d & SrcMask) == SrcImmByte ) ea.mem.off += 1; - else if ( !twobyte && ((b & 0xfe) == 0xf6) && + else if ( !ext && ((b & 0xfe) == 0xf6) && ((modrm_reg & 7) <= 1) ) /* Special case in Grp3: test has immediate operand. */ ea.mem.off += (d & ByteOp) ? 1 : ((op_bytes == 8) ? 4 : op_bytes); - else if ( twobyte && ((b & 0xf7) == 0xa4) ) + else if ( ext == ext_0f && ((b & 0xf7) == 0xa4) ) /* SHLD/SHRD with immediate byte third operand. */ ea.mem.off++; break; @@ -1815,7 +1827,9 @@ x86_emulate( ea.mem.seg = override_seg; /* Early operand adjustments. */ - if ( !twobyte ) + switch ( ext ) + { + case ext_none: switch ( b ) { case 0xf6 ... 0xf7: /* Grp3 */ @@ -1848,6 +1862,29 @@ x86_emulate( } break; } + break; + + case ext_0f: + break; + + case ext_0f38: + switch ( b ) + { + case 0xf0: /* movbe / crc32 */ + d |= repne_prefix() ? ByteOp : Mov; + break; + case 0xf1: /* movbe / crc32 */ + if ( !repne_prefix() ) + d = (d & ~(DstMask | SrcMask)) | DstMem | SrcReg | Mov; + break; + default: /* Until it is worth making this table based ... */ + goto cannot_emulate; + } + break; + + default: + ASSERT_UNREACHABLE(); + } /* Decode and fetch the source operand: register, memory or immediate. */ switch ( d & SrcMask ) @@ -2006,8 +2043,18 @@ x86_emulate( break; } - if ( twobyte ) - goto twobyte_insn; + switch ( ext ) + { + case ext_none: + break; + case ext_0f: + goto ext_0f_insn; + case ext_0f38: + goto ext_0f38_insn; + default: + ASSERT_UNREACHABLE(); + goto cannot_emulate; + } switch ( b ) { @@ -2050,7 +2097,7 @@ x86_emulate( struct segment_register reg; src.val = x86_seg_es; push_seg: - generate_exception_if(mode_64bit() && !twobyte, EXC_UD, -1); + generate_exception_if(mode_64bit() && !ext, EXC_UD, -1); fail_if(ops->read_segment == NULL); if ( (rc = ops->read_segment(src.val, ®, ctxt)) != 0 ) return rc; @@ -2066,7 +2113,7 @@ x86_emulate( case 0x07: /* pop %%es */ src.val = x86_seg_es; pop_seg: - generate_exception_if(mode_64bit() && !twobyte, EXC_UD, -1); + generate_exception_if(mode_64bit() && !ext, EXC_UD, -1); fail_if(ops->write_segment == NULL); /* 64-bit mode: POP defaults to a 64-bit operand. */ if ( mode_64bit() && (op_bytes == 4) ) @@ -2721,7 +2768,7 @@ x86_emulate( unsigned long sel; dst.val = x86_seg_es; les: /* dst.val identifies the segment */ - generate_exception_if(mode_64bit() && !twobyte, EXC_UD, -1); + generate_exception_if(mode_64bit() && !ext, EXC_UD, -1); generate_exception_if(src.type != OP_MEM, EXC_UD, -1); if ( (rc = read_ulong(src.mem.seg, src.mem.off + src.bytes, &sel, 2, ctxt, ops)) != 0 ) @@ -3862,7 +3909,7 @@ x86_emulate( put_stub(stub); return rc; - twobyte_insn: + ext_0f_insn: switch ( b ) { case 0x00: /* Grp6 */ @@ -4765,6 +4812,72 @@ x86_emulate( } goto writeback; + ext_0f38_insn: + switch ( b ) + { + case 0xf0: case 0xf1: /* movbe / crc32 */ + generate_exception_if(repe_prefix(), EXC_UD, -1); + if ( repne_prefix() ) + { + /* crc32 */ +#ifdef HAVE_GAS_SSE4_2 + host_and_vcpu_must_have(sse4_2); + dst.bytes = rex_prefix & REX_W ? 8 : 4; + switch ( op_bytes ) + { + case 1: + asm ( "crc32b %1,%k0" : "+r" (dst.val) + : "qm" (*(uint8_t *)&src.val) ); + break; + case 2: + asm ( "crc32w %1,%k0" : "+r" (dst.val) + : "rm" (*(uint16_t *)&src.val) ); + break; + case 4: + asm ( "crc32l %1,%k0" : "+r" (dst.val) + : "rm" (*(uint32_t *)&src.val) ); + break; +# ifdef __x86_64__ + case 8: + asm ( "crc32q %1,%0" : "+r" (dst.val) : "rm" (src.val) ); + break; +# endif + default: + ASSERT_UNREACHABLE(); + } +#else /* !HAVE_GAS_SSE4_2 */ + goto cannot_emulate; +#endif + } + else + { + /* movbe */ + vcpu_must_have_movbe(); + switch ( op_bytes ) + { + case 2: + asm ( "xchg %h0,%b0" : "=Q" (dst.val) + : "0" (*(uint32_t *)&src.val) ); + break; + case 4: +#ifdef __x86_64__ + asm ( "bswap %k0" : "=r" (dst.val) + : "0" (*(uint32_t *)&src.val) ); + break; + case 8: +#endif + asm ( "bswap %0" : "=r" (dst.val) : "0" (src.val) ); + break; + default: + ASSERT_UNREACHABLE(); + } + } + break; + default: + goto cannot_emulate; + } + goto writeback; + cannot_emulate: _put_fpu(); put_stub(stub); --- a/xen/include/asm-x86/cpufeature.h +++ b/xen/include/asm-x86/cpufeature.h @@ -189,6 +189,7 @@ #define cpu_has_sse boot_cpu_has(X86_FEATURE_SSE) #define cpu_has_sse2 boot_cpu_has(X86_FEATURE_SSE2) #define cpu_has_sse3 boot_cpu_has(X86_FEATURE_SSE3) +#define cpu_has_sse4_2 boot_cpu_has(X86_FEATURE_SSE4_2) #define cpu_has_ht boot_cpu_has(X86_FEATURE_HT) #define cpu_has_mp 1 #define cpu_has_nx boot_cpu_has(X86_FEATURE_NX) --- a/tools/tests/x86_emulator/test_x86_emulator.c +++ b/tools/tests/x86_emulator/test_x86_emulator.c @@ -78,7 +78,14 @@ static int cpuid( unsigned int *edx, struct x86_emulate_ctxt *ctxt) { + unsigned int leaf = *eax; + asm ("cpuid" : "+a" (*eax), "+c" (*ecx), "=d" (*edx), "=b" (*ebx)); + + /* The emulator doesn't itself use MOVBE, so we can always run the test. */ + if ( leaf == 1 ) + *ecx |= 1U << 22; + return X86EMUL_OKAY; } @@ -605,6 +612,34 @@ int main(int argc, char **argv) printf("skipped\n"); #endif + printf("%-40s", "Testing movbe (%%ecx),%%eax..."); + instr[0] = 0x0f; instr[1] = 0x38; instr[2] = 0xf0; instr[3] = 0x01; + regs.eflags = 0x200; + regs.eip = (unsigned long)&instr[0]; + regs.ecx = (unsigned long)res; + regs.eax = 0x11111111; + *res = 0x12345678; + rc = x86_emulate(&ctxt, &emulops); + if ( (rc != X86EMUL_OKAY) || + (*res != 0x12345678) || + (regs.eax != 0x78563412) || + (regs.eflags != 0x200) || + (regs.eip != (unsigned long)&instr[4]) ) + goto fail; + printf("okay\n"); + + printf("%-40s", "Testing movbe %%ax,(%%ecx)..."); + instr[0] = 0x66; instr[1] = 0x0f; instr[2] = 0x38; instr[3] = 0xf1; instr[4] = 0x01; + regs.eip = (unsigned long)&instr[0]; + rc = x86_emulate(&ctxt, &emulops); + if ( (rc != X86EMUL_OKAY) || + (*res != 0x12341234) || + (regs.eax != 0x78563412) || + (regs.eflags != 0x200) || + (regs.eip != (unsigned long)&instr[5]) ) + goto fail; + printf("okay\n"); + #define decl_insn(which) extern const unsigned char which[], which##_len[] #define put_insn(which, insn) ".pushsection .test, \"ax\", @progbits\n" \ #which ": " insn "\n" \ --- a/tools/tests/x86_emulator/x86_emulate.c +++ b/tools/tests/x86_emulator/x86_emulate.c @@ -12,6 +12,7 @@ typedef bool bool_t; #define BUG() abort() #define ASSERT assert +#define ASSERT_UNREACHABLE() assert(!__LINE__) #define cpu_has_amd_erratum(nr) 0 #define mark_regs_dirty(r) ((void)(r)) --- a/xen/arch/x86/Rules.mk +++ b/xen/arch/x86/Rules.mk @@ -16,6 +16,7 @@ CFLAGS += -msoft-float $(call cc-options-add,CFLAGS,CC,$(EMBEDDED_EXTRA_CFLAGS)) $(call cc-option-add,CFLAGS,CC,-Wnested-externs) $(call as-insn-check,CFLAGS,CC,"vmcall",-DHAVE_GAS_VMX) +$(call as-insn-check,CFLAGS,CC,"crc32 %eax$$(comma)%eax",-DHAVE_GAS_SSE4_2) $(call as-insn-check,CFLAGS,CC,"invept (%rax)$$(comma)%rax",-DHAVE_GAS_EPT) $(call as-insn-check,CFLAGS,CC,"rdfsbase %rax",-DHAVE_GAS_FSGSBASE) $(call as-insn-check,CFLAGS,CC,".equ \"x\"$$(comma)1", \ --- a/xen/arch/x86/x86_emulate/x86_emulate.c +++ b/xen/arch/x86/x86_emulate/x86_emulate.c @@ -188,7 +188,7 @@ static uint8_t twobyte_table[256] = { ImplicitOps, ImplicitOps, ImplicitOps, 0, ImplicitOps, ImplicitOps, 0, 0, /* 0x38 - 0x3F */ - 0, 0, 0, 0, 0, 0, 0, 0, + DstReg|SrcMem|ModRM, 0, 0, 0, 0, 0, 0, 0, /* 0x40 - 0x47 */ DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov, @@ -1091,6 +1091,8 @@ static bool_t vcpu_has( #define vcpu_must_have_sse2() vcpu_must_have(0x00000001, EDX, 26) #define vcpu_must_have_sse3() vcpu_must_have(0x00000001, ECX, 0) #define vcpu_must_have_cx16() vcpu_must_have(0x00000001, ECX, 13) +#define vcpu_must_have_sse4_2() vcpu_must_have(0x00000001, ECX, 20) +#define vcpu_must_have_movbe() vcpu_must_have(0x00000001, ECX, 22) #define vcpu_must_have_avx() vcpu_must_have(0x00000001, ECX, 28) #ifdef __XEN__ @@ -1503,8 +1505,9 @@ x86_emulate( /* Shadow copy of register state. Committed on successful emulation. */ struct cpu_user_regs _regs = *ctxt->regs; - uint8_t b, d, sib, sib_index, sib_base, twobyte = 0, rex_prefix = 0; + uint8_t b, d, sib, sib_index, sib_base, rex_prefix = 0; uint8_t modrm = 0, modrm_mod = 0, modrm_reg = 0, modrm_rm = 0; + enum { ext_none, ext_0f, ext_0f38 } ext = ext_none; union vex vex = {}; unsigned int op_bytes, def_op_bytes, ad_bytes, def_ad_bytes; bool_t lock_prefix = 0; @@ -1600,9 +1603,18 @@ x86_emulate( /* Two-byte opcode? */ if ( b == 0x0f ) { - twobyte = 1; b = insn_fetch_type(uint8_t); d = twobyte_table[b]; + switch ( b ) + { + default: + ext = ext_0f; + break; + case 0x38: + b = insn_fetch_type(uint8_t); + ext = ext_0f38; + break; + } } /* Unrecognised? */ @@ -1619,7 +1631,7 @@ x86_emulate( modrm = insn_fetch_type(uint8_t); modrm_mod = (modrm & 0xc0) >> 6; - if ( !twobyte && ((b & ~1) == 0xc4) ) + if ( !ext && ((b & ~1) == 0xc4) ) switch ( def_ad_bytes ) { default: @@ -1665,12 +1677,12 @@ x86_emulate( rex_prefix |= REX_R; fail_if(vex.opcx != vex_0f); - twobyte = 1; + ext = ext_0f; b = insn_fetch_type(uint8_t); d = twobyte_table[b]; /* Unrecognised? */ - if ( d == 0 ) + if ( d == 0 || b == 0x38 ) goto cannot_emulate; modrm = insn_fetch_type(uint8_t); @@ -1756,7 +1768,7 @@ x86_emulate( { ea.mem.seg = x86_seg_ss; ea.mem.off += _regs.esp; - if ( !twobyte && (b == 0x8f) ) + if ( !ext && (b == 0x8f) ) /* POP computes its EA post increment. */ ea.mem.off += ((mode_64bit() && (op_bytes == 4)) ? 8 : op_bytes); @@ -1791,12 +1803,12 @@ x86_emulate( ((op_bytes == 8) ? 4 : op_bytes); else if ( (d & SrcMask) == SrcImmByte ) ea.mem.off += 1; - else if ( !twobyte && ((b & 0xfe) == 0xf6) && + else if ( !ext && ((b & 0xfe) == 0xf6) && ((modrm_reg & 7) <= 1) ) /* Special case in Grp3: test has immediate operand. */ ea.mem.off += (d & ByteOp) ? 1 : ((op_bytes == 8) ? 4 : op_bytes); - else if ( twobyte && ((b & 0xf7) == 0xa4) ) + else if ( ext == ext_0f && ((b & 0xf7) == 0xa4) ) /* SHLD/SHRD with immediate byte third operand. */ ea.mem.off++; break; @@ -1815,7 +1827,9 @@ x86_emulate( ea.mem.seg = override_seg; /* Early operand adjustments. */ - if ( !twobyte ) + switch ( ext ) + { + case ext_none: switch ( b ) { case 0xf6 ... 0xf7: /* Grp3 */ @@ -1848,6 +1862,29 @@ x86_emulate( } break; } + break; + + case ext_0f: + break; + + case ext_0f38: + switch ( b ) + { + case 0xf0: /* movbe / crc32 */ + d |= repne_prefix() ? ByteOp : Mov; + break; + case 0xf1: /* movbe / crc32 */ + if ( !repne_prefix() ) + d = (d & ~(DstMask | SrcMask)) | DstMem | SrcReg | Mov; + break; + default: /* Until it is worth making this table based ... */ + goto cannot_emulate; + } + break; + + default: + ASSERT_UNREACHABLE(); + } /* Decode and fetch the source operand: register, memory or immediate. */ switch ( d & SrcMask ) @@ -2006,8 +2043,18 @@ x86_emulate( break; } - if ( twobyte ) - goto twobyte_insn; + switch ( ext ) + { + case ext_none: + break; + case ext_0f: + goto ext_0f_insn; + case ext_0f38: + goto ext_0f38_insn; + default: + ASSERT_UNREACHABLE(); + goto cannot_emulate; + } switch ( b ) { @@ -2050,7 +2097,7 @@ x86_emulate( struct segment_register reg; src.val = x86_seg_es; push_seg: - generate_exception_if(mode_64bit() && !twobyte, EXC_UD, -1); + generate_exception_if(mode_64bit() && !ext, EXC_UD, -1); fail_if(ops->read_segment == NULL); if ( (rc = ops->read_segment(src.val, ®, ctxt)) != 0 ) return rc; @@ -2066,7 +2113,7 @@ x86_emulate( case 0x07: /* pop %%es */ src.val = x86_seg_es; pop_seg: - generate_exception_if(mode_64bit() && !twobyte, EXC_UD, -1); + generate_exception_if(mode_64bit() && !ext, EXC_UD, -1); fail_if(ops->write_segment == NULL); /* 64-bit mode: POP defaults to a 64-bit operand. */ if ( mode_64bit() && (op_bytes == 4) ) @@ -2721,7 +2768,7 @@ x86_emulate( unsigned long sel; dst.val = x86_seg_es; les: /* dst.val identifies the segment */ - generate_exception_if(mode_64bit() && !twobyte, EXC_UD, -1); + generate_exception_if(mode_64bit() && !ext, EXC_UD, -1); generate_exception_if(src.type != OP_MEM, EXC_UD, -1); if ( (rc = read_ulong(src.mem.seg, src.mem.off + src.bytes, &sel, 2, ctxt, ops)) != 0 ) @@ -3862,7 +3909,7 @@ x86_emulate( put_stub(stub); return rc; - twobyte_insn: + ext_0f_insn: switch ( b ) { case 0x00: /* Grp6 */ @@ -4765,6 +4812,72 @@ x86_emulate( } goto writeback; + ext_0f38_insn: + switch ( b ) + { + case 0xf0: case 0xf1: /* movbe / crc32 */ + generate_exception_if(repe_prefix(), EXC_UD, -1); + if ( repne_prefix() ) + { + /* crc32 */ +#ifdef HAVE_GAS_SSE4_2 + host_and_vcpu_must_have(sse4_2); + dst.bytes = rex_prefix & REX_W ? 8 : 4; + switch ( op_bytes ) + { + case 1: + asm ( "crc32b %1,%k0" : "+r" (dst.val) + : "qm" (*(uint8_t *)&src.val) ); + break; + case 2: + asm ( "crc32w %1,%k0" : "+r" (dst.val) + : "rm" (*(uint16_t *)&src.val) ); + break; + case 4: + asm ( "crc32l %1,%k0" : "+r" (dst.val) + : "rm" (*(uint32_t *)&src.val) ); + break; +# ifdef __x86_64__ + case 8: + asm ( "crc32q %1,%0" : "+r" (dst.val) : "rm" (src.val) ); + break; +# endif + default: + ASSERT_UNREACHABLE(); + } +#else /* !HAVE_GAS_SSE4_2 */ + goto cannot_emulate; +#endif + } + else + { + /* movbe */ + vcpu_must_have_movbe(); + switch ( op_bytes ) + { + case 2: + asm ( "xchg %h0,%b0" : "=Q" (dst.val) + : "0" (*(uint32_t *)&src.val) ); + break; + case 4: +#ifdef __x86_64__ + asm ( "bswap %k0" : "=r" (dst.val) + : "0" (*(uint32_t *)&src.val) ); + break; + case 8: +#endif + asm ( "bswap %0" : "=r" (dst.val) : "0" (src.val) ); + break; + default: + ASSERT_UNREACHABLE(); + } + } + break; + default: + goto cannot_emulate; + } + goto writeback; + cannot_emulate: _put_fpu(); put_stub(stub); --- a/xen/include/asm-x86/cpufeature.h +++ b/xen/include/asm-x86/cpufeature.h @@ -189,6 +189,7 @@ #define cpu_has_sse boot_cpu_has(X86_FEATURE_SSE) #define cpu_has_sse2 boot_cpu_has(X86_FEATURE_SSE2) #define cpu_has_sse3 boot_cpu_has(X86_FEATURE_SSE3) +#define cpu_has_sse4_2 boot_cpu_has(X86_FEATURE_SSE4_2) #define cpu_has_ht boot_cpu_has(X86_FEATURE_HT) #define cpu_has_mp 1 #define cpu_has_nx boot_cpu_has(X86_FEATURE_NX)