@@ -81,6 +81,7 @@ enum esz {
ESZ_w,
ESZ_bw,
ESZ_fp16,
+#define ESZ_bf16 ESZ_fp16
};
#ifndef __i386__
@@ -711,6 +712,16 @@ static const struct test vpclmulqdq_all[
INSN(pclmulqdq, 66, 0f3a, 44, vl, q_nb, vl)
};
+static const struct test avx10_2_all[] = {
+ INSN(comsbf16, 66, map5, 2f, el, bf16, el),
+ INSN(comxsd, f3, 0f, 2f, el, q, el),
+ INSN(comxsh, f2, map5, 2f, el, fp16, el),
+ INSN(comxss, f2, 0f, 2f, el, d, el),
+ INSN(ucomxsd, f3, 0f, 2e, el, q, el),
+ INSN(ucomxsh, f2, map5, 2e, el, fp16, el),
+ INSN(ucomxss, f2, 0f, 2e, el, d, el),
+};
+
static const unsigned char vl_all[] = { VL_512, VL_128, VL_256 };
static const unsigned char vl_128[] = { VL_128 };
static const unsigned char vl_no128[] = { VL_512, VL_256 };
@@ -1130,5 +1141,8 @@ void evex_disp8_test(void *instr, struct
RUN(vpclmulqdq, all);
#undef RUN
}
+
+ run(cpu_has_avx10_2, avx10_2, all);
+
#undef run
}
@@ -1682,8 +1682,12 @@ static const struct evex {
{ { 0x2d }, 2, T, R, pfx_f2, Wn, LIG }, /* vcvtsd2si */
{ { 0x2e }, 2, T, R, pfx_no, W0, LIG }, /* vucomiss */
{ { 0x2e }, 2, T, R, pfx_66, W1, LIG }, /* vucomisd */
+ { { 0x2e }, 2, T, R, pfx_f3, W1, LIG }, /* vucomxsd */
+ { { 0x2e }, 2, T, R, pfx_f2, W0, LIG }, /* vucomxss */
{ { 0x2f }, 2, T, R, pfx_no, W0, LIG }, /* vcomiss */
{ { 0x2f }, 2, T, R, pfx_66, W1, LIG }, /* vcomisd */
+ { { 0x2f }, 2, T, R, pfx_f3, W1, LIG }, /* vcomxsd */
+ { { 0x2f }, 2, T, R, pfx_f2, W0, LIG }, /* vcomxss */
{ { 0x51 }, 2, T, R, pfx_no, W0, Ln }, /* vsqrtps */
{ { 0x51 }, 2, T, R, pfx_66, W1, Ln }, /* vsqrtpd */
{ { 0x51 }, 2, T, R, pfx_f3, W0, LIG }, /* vsqrtss */
@@ -2100,7 +2104,10 @@ static const struct evex {
{ { 0x2c }, 2, T, R, pfx_f3, Wn, LIG }, /* vcvttsh2si */
{ { 0x2d }, 2, T, R, pfx_f3, Wn, LIG }, /* vcvtsh2si */
{ { 0x2e }, 2, T, R, pfx_no, W0, LIG }, /* vucomish */
+ { { 0x2e }, 2, T, R, pfx_f2, W0, LIG }, /* vucomxsh */
{ { 0x2f }, 2, T, R, pfx_no, W0, LIG }, /* vcomish */
+ { { 0x2f }, 2, T, R, pfx_66, W0, LIG }, /* vcomsbf16 */
+ { { 0x2f }, 2, T, R, pfx_f2, W0, LIG }, /* vcomxsh */
{ { 0x51 }, 2, T, R, pfx_no, W0, Ln }, /* vsqrtph */
{ { 0x51 }, 2, T, R, pfx_f3, W0, LIG }, /* vsqrtsh */
{ { 0x58 }, 2, T, R, pfx_no, W0, Ln }, /* vaddph */
@@ -213,6 +213,8 @@ void wrpkru(unsigned int val);
(cpu_policy.avx10.vsz256 || \
cpu_policy.avx10.vsz512))
#define cpu_has_avx10_1_512 (cpu_has_avx10_1 && cpu_policy.avx10.vsz512)
+#define cpu_has_avx10_2 (cpu_policy.avx10.version >= 2 && \
+ xcr0_mask(0xe6))
#define cpu_has_xgetbv1 (cpu_has_xsave && cpu_policy.xstate.xgetbv1)
@@ -1521,9 +1521,8 @@ int x86emul_decode(struct x86_emulate_st
s->fp16 = true;
break;
- case 0x2e: case 0x2f: /* v{,u}comish */
- if ( !s->evex.pfx )
- s->fp16 = true;
+ case 0x2e: case 0x2f: /* v{,u}com{i,x}sh, vcomsbf16 */
+ s->fp16 = true;
s->simd_size = simd_none;
break;
@@ -304,7 +304,7 @@ struct x86_emulate_state {
bool lock_prefix;
bool not_64bit; /* Instruction not available in 64bit. */
bool fpu_ctrl; /* Instruction is an FPU control one. */
- bool fp16; /* Instruction has half-precision FP source operand. */
+ bool fp16; /* Instruction has half-precision FP or BF16 source. */
opcode_desc_t desc;
union vex vex;
union evex evex;
@@ -596,8 +596,8 @@ amd_like(const struct x86_emulate_ctxt *
#define vcpu_has_avx10(minor) (ctxt->cpuid->avx10.version >= (minor))
-#define vcpu_must_have(feat) \
- generate_exception_if(!vcpu_has_##feat(), X86_EXC_UD)
+#define vcpu_must_have(feat, ...) \
+ generate_exception_if(!vcpu_has_##feat(__VA_ARGS__), X86_EXC_UD)
#ifdef __XEN__
/*
@@ -3813,7 +3813,6 @@ x86_emulate(
case X86EMUL_OPC_EVEX(5, 0x2e): /* vucomish xmm/m16,xmm */
case X86EMUL_OPC_EVEX(5, 0x2f): /* vcomish xmm/m16,xmm */
visa_check(_fp16);
- generate_exception_if(evex.w, X86_EXC_UD);
/* fall through */
CASE_SIMD_PACKED_FP(_EVEX, 0x0f, 0x2e): /* vucomis{s,d} xmm/mem,xmm */
CASE_SIMD_PACKED_FP(_EVEX, 0x0f, 0x2f): /* vcomis{s,d} xmm/mem,xmm */
@@ -3821,6 +3820,7 @@ x86_emulate(
evex.w != evex.pfx),
X86_EXC_UD);
visa_check(f);
+ vcomi_evex:
if ( !evex.brs )
avx512_vlen_check(true);
else
@@ -3831,6 +3831,17 @@ x86_emulate(
op_bytes = 2 << (!state->fp16 + evex.w);
goto vcomi;
+ CASE_SIMD_SCALAR_FP(_EVEX, 0x0f, 0x2e): /* vucomxs{s,d} xmm/mem,xmm */
+ CASE_SIMD_SCALAR_FP(_EVEX, 0x0f, 0x2f): /* vcomxs{s,d} xmm/mem,xmm */
+ case X86EMUL_OPC_EVEX_F2(5, 0x2e): /* vucomxsh xmm/m16,xmm */
+ case X86EMUL_OPC_EVEX_66(5, 0x2f): /* vcomsbf16 xmm/m16,xmm */
+ case X86EMUL_OPC_EVEX_F2(5, 0x2f): /* vcomxsh xmm/m16,xmm */
+ generate_exception_if((evex.reg != 0xf || !evex.RX || evex.opmsk ||
+ evex.w != !(evex.pfx & 1)),
+ X86_EXC_UD);
+ vcpu_must_have(avx10, 2);
+ goto vcomi_evex;
+
#endif
case X86EMUL_OPC(0x0f, 0x30): /* wrmsr */
Simply clone code from their V{,U}COMIS{S,D,H} counterparts. While there drop a redundant EVEX.W check from V{,U}COMISH handling. Signed-off-by: Jan Beulich <jbeulich@suse.com> --- This still follows what spec version 001 says wrt embedded prefixed. They were swapped to match other insns, yet so far no SDE is available to run the test harness there with the flipped encoding. --- SDE: ??? --- v3: New.