@@ -232,6 +232,7 @@ int libxl_cpuid_parse_config(libxl_cpuid
{"avx-vnni", 0x00000007, 1, CPUID_REG_EAX, 4, 1},
{"avx512-bf16", 0x00000007, 1, CPUID_REG_EAX, 5, 1},
+ {"cmpccxadd", 0x00000007, 1, CPUID_REG_EAX, 7, 1},
{"fzrm", 0x00000007, 1, CPUID_REG_EAX, 10, 1},
{"fsrs", 0x00000007, 1, CPUID_REG_EAX, 11, 1},
{"fsrcs", 0x00000007, 1, CPUID_REG_EAX, 12, 1},
@@ -186,6 +186,7 @@ static const char *const str_7d0[32] =
static const char *const str_7a1[32] =
{
[ 4] = "avx-vnni", [ 5] = "avx512-bf16",
+ /* 6 */ [ 7] = "cmpccxadd",
[10] = "fzrm", [11] = "fsrs",
[12] = "fsrcs",
@@ -1388,6 +1388,22 @@ static const struct vex {
{ { 0xdd }, 2, T, R, pfx_66, WIG, Ln }, /* vaesenclast */
{ { 0xde }, 2, T, R, pfx_66, WIG, Ln }, /* vaesdec */
{ { 0xdf }, 2, T, R, pfx_66, WIG, Ln }, /* vaesdeclast */
+ { { 0xe0 }, 2, F, W, pfx_66, Wn, L0 }, /* cmpoxadd */
+ { { 0xe1 }, 2, F, W, pfx_66, Wn, L0 }, /* cmpnoxadd */
+ { { 0xe2 }, 2, F, W, pfx_66, Wn, L0 }, /* cmpbxadd */
+ { { 0xe3 }, 2, F, W, pfx_66, Wn, L0 }, /* cmpnbxadd */
+ { { 0xe4 }, 2, F, W, pfx_66, Wn, L0 }, /* cmpexadd */
+ { { 0xe5 }, 2, F, W, pfx_66, Wn, L0 }, /* cmpnexadd */
+ { { 0xe6 }, 2, F, W, pfx_66, Wn, L0 }, /* cmpbexadd */
+ { { 0xe7 }, 2, F, W, pfx_66, Wn, L0 }, /* cmpaxadd */
+ { { 0xe8 }, 2, F, W, pfx_66, Wn, L0 }, /* cmpsxadd */
+ { { 0xe9 }, 2, F, W, pfx_66, Wn, L0 }, /* cmpnsxadd */
+ { { 0xea }, 2, F, W, pfx_66, Wn, L0 }, /* cmppxadd */
+ { { 0xeb }, 2, F, W, pfx_66, Wn, L0 }, /* cmpnpxadd */
+ { { 0xec }, 2, F, W, pfx_66, Wn, L0 }, /* cmplxadd */
+ { { 0xed }, 2, F, W, pfx_66, Wn, L0 }, /* cmpgexadd */
+ { { 0xee }, 2, F, W, pfx_66, Wn, L0 }, /* cmplexadd */
+ { { 0xef }, 2, F, W, pfx_66, Wn, L0 }, /* cmpgxadd */
{ { 0xf2 }, 2, T, R, pfx_no, Wn, L0 }, /* andn */
{ { 0xf3, 0x08 }, 2, T, R, pfx_no, Wn, L0 }, /* blsr */
{ { 0xf3, 0x10 }, 2, T, R, pfx_no, Wn, L0 }, /* blsmsk */
@@ -1398,6 +1398,78 @@ int main(int argc, char **argv)
}
printf("okay\n");
+ printf("%-40s", "Testing cmpbxadd %rbx,%r9,(%rdx)...");
+ if ( stack_exec && cpu_has_cmpccxadd )
+ {
+ instr[0] = 0xc4; instr[1] = 0x62; instr[2] = 0xe1; instr[3] = 0xe2; instr[4] = 0x0a;
+ regs.rip = (unsigned long)&instr[0];
+ regs.eflags = EFLAGS_ALWAYS_SET;
+ res[0] = 0x11223344;
+ res[1] = 0x01020304;
+ regs.rdx = (unsigned long)res;
+ regs.r9 = 0x0001020300112233UL;
+ regs.rbx = 0x0101010101010101UL;
+ rc = x86_emulate(&ctxt, &emulops);
+ if ( (rc != X86EMUL_OKAY) ||
+ (regs.eip != (unsigned long)&instr[5]) ||
+ (regs.r9 != 0x0102030411223344UL) ||
+ (regs.rbx != 0x0101010101010101UL) ||
+ ((regs.eflags & EFLAGS_MASK) !=
+ (X86_EFLAGS_PF | EFLAGS_ALWAYS_SET)) ||
+ (res[0] != 0x11223344) ||
+ (res[1] != 0x01020304) )
+ goto fail;
+
+ regs.rip = (unsigned long)&instr[0];
+ regs.r9 <<= 8;
+ rc = x86_emulate(&ctxt, &emulops);
+ if ( (rc != X86EMUL_OKAY) ||
+ (regs.eip != (unsigned long)&instr[5]) ||
+ (regs.r9 != 0x0102030411223344UL) ||
+ (regs.rbx != 0x0101010101010101UL) ||
+ ((regs.eflags & EFLAGS_MASK) !=
+ (X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_SF |
+ EFLAGS_ALWAYS_SET)) ||
+ (res[0] != 0x12233445) ||
+ (res[1] != 0x02030405) )
+ goto fail;
+ printf("okay\n");
+
+ printf("%-40s", "Testing cmpsxadd %r9d,%ebx,4(%r10)...");
+ instr[1] = 0xc2; instr[2] = 0x31; instr[3] = 0xe8; instr[4] = 0x5a; instr[5] = 0x04;
+ regs.rip = (unsigned long)&instr[0];
+ res[2] = res[0] = ~0;
+ regs.r10 = (unsigned long)res;
+ rc = x86_emulate(&ctxt, &emulops);
+ if ( (rc != X86EMUL_OKAY) ||
+ (regs.eip != (unsigned long)&instr[6]) ||
+ (regs.r9 != 0x0102030411223344UL) ||
+ (regs.rbx != 0x02030405) ||
+ ((regs.eflags & EFLAGS_MASK) != EFLAGS_ALWAYS_SET) ||
+ (res[0] + 1) ||
+ (res[1] != 0x02030405) ||
+ (res[2] + 1) )
+ goto fail;
+
+ regs.rip = (unsigned long)&instr[0];
+ regs.rbx <<= 8;
+ rc = x86_emulate(&ctxt, &emulops);
+ if ( (rc != X86EMUL_OKAY) ||
+ (regs.eip != (unsigned long)&instr[6]) ||
+ (regs.r9 != 0x0102030411223344UL) ||
+ (regs.rbx != 0x02030405) ||
+ ((regs.eflags & EFLAGS_MASK) !=
+ (X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_SF |
+ EFLAGS_ALWAYS_SET)) ||
+ (res[0] + 1) ||
+ (res[1] != 0x13253749) ||
+ (res[2] + 1) )
+ goto fail;
+ printf("okay\n");
+ }
+ else
+ printf("skipped\n");
+
emulops.write_segment = write_segment;
emulops.write_msr = write_msr;
@@ -185,6 +185,7 @@ void wrpkru(unsigned int val);
#define cpu_has_serialize cp.feat.serialize
#define cpu_has_avx_vnni (cp.feat.avx_vnni && xcr0_mask(6))
#define cpu_has_avx512_bf16 (cp.feat.avx512_bf16 && xcr0_mask(0xe6))
+#define cpu_has_cmpccxadd cp.feat.cmpccxadd
#define cpu_has_xgetbv1 (cpu_has_xsave && cp.xstate.xgetbv1)
@@ -170,6 +170,7 @@ extern struct cpuinfo_x86 boot_cpu_data;
/* CPUID level 0x00000007:1.eax */
#define cpu_has_avx_vnni boot_cpu_has(X86_FEATURE_AVX_VNNI)
#define cpu_has_avx512_bf16 boot_cpu_has(X86_FEATURE_AVX512_BF16)
+#define cpu_has_cmpccxadd boot_cpu_has(X86_FEATURE_CMPCCXADD)
/* Synthesized. */
#define cpu_has_arch_perfmon boot_cpu_has(X86_FEATURE_ARCH_PERFMON)
@@ -443,6 +443,7 @@ static const struct ext0f38_table {
[0xcf] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
[0xdb] = { .simd_size = simd_packed_int, .two_op = 1 },
[0xdc ... 0xdf] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
+ [0xe0 ... 0xef] = { .to_mem = 1 },
[0xf0] = { .two_op = 1 },
[0xf1] = { .to_mem = 1, .two_op = 1 },
[0xf2 ... 0xf3] = {},
@@ -934,6 +935,8 @@ decode_0f38(struct x86_emulate_state *s,
ctxt->opcode |= MASK_INSR(s->vex.pfx, X86EMUL_OPC_PFX_MASK);
break;
+ case X86EMUL_OPC_VEX_66(0, 0xe0)
+ ... X86EMUL_OPC_VEX_66(0, 0xef): /* cmp<cc>xadd */
case X86EMUL_OPC_VEX(0, 0xf2): /* andn */
case X86EMUL_OPC_VEX(0, 0xf3): /* Grp 17 */
case X86EMUL_OPC_VEX(0, 0xf5): /* bzhi */
@@ -265,6 +265,7 @@ struct x86_emulate_state {
rmw_btc,
rmw_btr,
rmw_bts,
+ rmw_cmpccxadd,
rmw_dec,
rmw_inc,
rmw_neg,
@@ -322,6 +323,8 @@ struct x86_emulate_state {
unsigned long ip;
+ struct stub_exn *stub_exn;
+
#ifndef NDEBUG
/*
* Track caller of x86_decode_insn() to spot missing as well as
@@ -593,6 +596,7 @@ amd_like(const struct x86_emulate_ctxt *
#define vcpu_has_tsxldtrk() (ctxt->cpuid->feat.tsxldtrk)
#define vcpu_has_avx_vnni() (ctxt->cpuid->feat.avx_vnni)
#define vcpu_has_avx512_bf16() (ctxt->cpuid->feat.avx512_bf16)
+#define vcpu_has_cmpccxadd() (ctxt->cpuid->feat.cmpccxadd)
#define vcpu_has_lkgs() (ctxt->cpuid->feat.lkgs)
#define vcpu_has_wrmsrns() (ctxt->cpuid->feat.wrmsrns)
@@ -6881,6 +6881,15 @@ x86_emulate(
#endif /* !X86EMUL_NO_SIMD */
+ case X86EMUL_OPC_VEX_66(0x0f38, 0xe0)
+ ... X86EMUL_OPC_VEX_66(0x0f38, 0xef): /* cmp<cc>xadd r,r,m */
+ generate_exception_if(!mode_64bit() || dst.type != OP_MEM || vex.l,
+ EXC_UD);
+ host_and_vcpu_must_have(cmpccxadd);
+ fail_if(!ops->rmw);
+ state->rmw = rmw_cmpccxadd;
+ break;
+
case X86EMUL_OPC(0x0f38, 0xf0): /* movbe m,r */
case X86EMUL_OPC(0x0f38, 0xf1): /* movbe r,m */
vcpu_must_have(movbe);
@@ -7942,14 +7951,20 @@ x86_emulate(
{
ea.val = src.val;
op_bytes = dst.bytes;
+ state->stub_exn = &stub_exn;
rc = ops->rmw(dst.mem.seg, dst.mem.off, dst.bytes, &_regs.eflags,
state, ctxt);
+#ifdef __XEN__
+ if ( rc == X86EMUL_stub_failure )
+ goto emulation_stub_failure;
+#endif
if ( rc != X86EMUL_OKAY )
goto done;
/* Some operations require a register to be written. */
switch ( state->rmw )
{
+ case rmw_cmpccxadd:
case rmw_xchg:
case rmw_xadd:
switch ( dst.bytes )
@@ -8224,6 +8239,7 @@ int x86_emul_rmw(
uint32_t *eflags,
struct x86_emulate_state *state,
struct x86_emulate_ctxt *ctxt)
+#define stub_exn (*state->stub_exn) /* for invoke_stub() */
{
unsigned long *dst = ptr;
@@ -8289,6 +8305,37 @@ int x86_emul_rmw(
#undef BINOP
#undef SHIFT
+#ifdef __x86_64__
+ case rmw_cmpccxadd:
+ {
+ struct x86_emulate_stub stub = {};
+ uint8_t *buf = get_stub(stub);
+ typeof(state->vex) *pvex = container_of(buf + 1, typeof(state->vex),
+ raw[0]);
+ unsigned long dummy;
+
+ buf[0] = 0xc4;
+ *pvex = state->vex;
+ pvex->b = 1;
+ pvex->r = 1;
+ pvex->reg = 0xf; /* rAX */
+ buf[3] = ctxt->opcode;
+ buf[4] = 0x11; /* reg=rDX r/m=(%RCX) */
+ buf[5] = 0xc3;
+
+ *eflags &= ~EFLAGS_MASK;
+ invoke_stub("",
+ _POST_EFLAGS("[eflags]", "[mask]", "[tmp]"),
+ "+m" (*dst), "+d" (state->ea.val),
+ [tmp] "=&r" (dummy), [eflags] "+g" (*eflags)
+ : "a" (*decode_vex_gpr(state->vex.reg, ctxt->regs, ctxt)),
+ "c" (dst), [mask] "i" (EFLAGS_MASK));
+
+ put_stub(stub);
+ break;
+ }
+#endif
+
case rmw_not:
switch ( state->op_bytes )
{
@@ -8384,7 +8431,13 @@ int x86_emul_rmw(
#undef JCXZ
return X86EMUL_OKAY;
+
+#if defined(__XEN__) && defined(__x86_64__)
+ emulation_stub_failure:
+ return X86EMUL_stub_failure;
+#endif
}
+#undef stub_exn
static void __init __maybe_unused build_assertions(void)
{
@@ -278,6 +278,7 @@ XEN_CPUFEATURE(SSBD, 9*32+31) /
/* Intel-defined CPU features, CPUID level 0x00000007:1.eax, word 10 */
XEN_CPUFEATURE(AVX_VNNI, 10*32+ 4) /*A AVX-VNNI Instructions */
XEN_CPUFEATURE(AVX512_BF16, 10*32+ 5) /*A AVX512 BFloat16 Instructions */
+XEN_CPUFEATURE(CMPCCXADD, 10*32+ 7) /*A CMPccXADD Instructions */
XEN_CPUFEATURE(FZRM, 10*32+10) /*A Fast Zero-length REP MOVSB */
XEN_CPUFEATURE(FSRS, 10*32+11) /*A Fast Short REP STOSB */
XEN_CPUFEATURE(FSRCS, 10*32+12) /*A Fast Short REP CMPSB/SCASB */
Unconditionally wire this through the ->rmw() hook. Since x86_emul_rmw() now wants to construct and invoke a stub, make stub_exn available to it via a new field in the emulator state structure. Signed-off-by: Jan Beulich <jbeulich@suse.com> --- # SDE: -grr or -srf