diff mbox series

[6/6] x86emul: support MOVDIR{I,64B} insns

Message ID 667bafda-b811-9864-9ad3-95447a7fb62f@suse.com (mailing list archive)
State Superseded
Headers show
Series x86emul: further work | expand

Commit Message

Jan Beulich July 1, 2019, 11:58 a.m. UTC
Note that the ISA extensions document revision 035 doesn't specify
exception behavior for ModRM.mod != 0b11; assuming #UD here.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

Comments

Andrew Cooper Aug. 27, 2019, 4:04 p.m. UTC | #1
On 01/07/2019 12:58, Jan Beulich wrote:
> Note that the ISA extensions document revision 035 doesn't specify
> exception behavior for ModRM.mod != 0b11; assuming #UD here.

This has moved into the main SDM now.  These instructions don't make
sense with reg/reg encodings, so I expect that encoding space will be
reused as a new group at some point in the future.

>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>
> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
> @@ -548,6 +548,8 @@ static const struct ext0f38_table {
>       [0xf1] = { .to_mem = 1, .two_op = 1 },
>       [0xf2 ... 0xf3] = {},
>       [0xf5 ... 0xf7] = {},
> +    [0xf8] = { .simd_size = simd_other },
> +    [0xf9] = { .to_mem = 1 },
>   };
>   
>   /* Shift values between src and dst sizes of pmov{s,z}x{b,w,d}{w,d,q}. */
> @@ -1902,6 +1904,8 @@ in_protmode(
>   #define vcpu_has_avx512_bitalg() (ctxt->cpuid->feat.avx512_bitalg)
>   #define vcpu_has_avx512_vpopcntdq() (ctxt->cpuid->feat.avx512_vpopcntdq)
>   #define vcpu_has_rdpid()       (ctxt->cpuid->feat.rdpid)
> +#define vcpu_has_movdiri()     (ctxt->cpuid->feat.movdiri)
> +#define vcpu_has_movdir64b()   (ctxt->cpuid->feat.movdir64b)
>   #define vcpu_has_avx512_4vnniw() (ctxt->cpuid->feat.avx512_4vnniw)
>   #define vcpu_has_avx512_4fmaps() (ctxt->cpuid->feat.avx512_4fmaps)
>   
> @@ -2693,10 +2697,12 @@ x86_decode_0f38(
>       {
>       case 0x00 ... 0xef:
>       case 0xf2 ... 0xf5:
> -    case 0xf7 ... 0xff:
> +    case 0xf7 ... 0xf8:
> +    case 0xfa ... 0xff:
>           op_bytes = 0;
>           /* fall through */
>       case 0xf6: /* adcx / adox */
> +    case 0xf9: /* movdiri */
>           ctxt->opcode |= MASK_INSR(vex.pfx, X86EMUL_OPC_PFX_MASK);
>           break;
>   
> @@ -9896,6 +9902,32 @@ x86_emulate(
>                               : "0" ((uint32_t)src.val), "rm" (_regs.edx) );
>           break;
>   
> +    case X86EMUL_OPC_66(0x0f38, 0xf8): /* movdir64b r,m512 */
> +        vcpu_must_have(movdir64b);
> +        generate_exception_if(ea.type != OP_MEM, EXC_UD);
> +        src.val = truncate_ea(*dst.reg);
> +        generate_exception_if(!is_aligned(x86_seg_es, src.val, 64, ctxt, ops),
> +                              EXC_GP, 0);
> +        /* Ignore the non-temporal behavior for now. */
> +        fail_if(!ops->write);
> +        BUILD_BUG_ON(sizeof(*mmvalp) < 64);
> +        if ( (rc = ops->read(ea.mem.seg, ea.mem.off, mmvalp, 64,
> +                             ctxt)) != X86EMUL_OKAY ||
> +             (rc = ops->write(x86_seg_es, src.val, mmvalp, 64,
> +                              ctxt)) != X86EMUL_OKAY )
> +            goto done;
> +        state->simd_size = simd_none;
> +        sfence = true;
> +        break;
> +
> +    case X86EMUL_OPC(0x0f38, 0xf9): /* movdiri mem,r */
> +        vcpu_must_have(movdiri);
> +        generate_exception_if(dst.type != OP_MEM, EXC_UD);
> +        /* Ignore the non-temporal behavior for now. */
> +        dst.val = src.val;
> +        sfence = true;
> +        break;

I'm not certain this gives the required atomicity.  AFAICT, it degrades
into ops->write(), which can end up with bytewise writes.

I think we need to map the destination and issue an explicit mov
instruction.

> --- a/tools/tests/x86_emulator/x86-emulate.c
> +++ b/tools/tests/x86_emulator/x86-emulate.c
> @@ -76,6 +76,8 @@ bool emul_test_init(void)
>       cp.feat.adx = true;
>       cp.feat.avx512pf = cp.feat.avx512f;
>       cp.feat.rdpid = true;
> +    cp.feat.movdiri = true;
> +    cp.feat.movdir64b = true;
>       cp.extd.clzero = true;
>   
>       if ( cpu_has_xsave )
> @@ -137,15 +139,15 @@ int emul_test_cpuid(
>           res->c |= 1U << 22;
>   
>       /*
> -     * The emulator doesn't itself use ADCX/ADOX/RDPID nor the S/G prefetch
> -     * insns, so we can always run the respective tests.
> +     * The emulator doesn't itself use ADCX/ADOX/RDPID/MOVDIR* nor the S/G
> +     * prefetch insns, so we can always run the respective tests.
>        */
>       if ( leaf == 7 && subleaf == 0 )
>       {
>           res->b |= (1U << 10) | (1U << 19);
>           if ( res->b & (1U << 16) )
>               res->b |= 1U << 26;
> -        res->c |= 1U << 22;
> +        res->c |= (1U << 22) | (1U << 27) | (1U << 28);

I've just noticed, but we shouldn't be having both the named variables
and these unnamed ones.  Is their presence a rebasing issue around
patches into the test suite?

~Andrew
Jan Beulich Aug. 28, 2019, 6:26 a.m. UTC | #2
On 27.08.2019 18:04, Andrew Cooper wrote:
> On 01/07/2019 12:58, Jan Beulich wrote:
>> @@ -9896,6 +9902,32 @@ x86_emulate(
>>                                : "0" ((uint32_t)src.val), "rm" (_regs.edx) );
>>            break;
>>    
>> +    case X86EMUL_OPC_66(0x0f38, 0xf8): /* movdir64b r,m512 */
>> +        vcpu_must_have(movdir64b);
>> +        generate_exception_if(ea.type != OP_MEM, EXC_UD);
>> +        src.val = truncate_ea(*dst.reg);
>> +        generate_exception_if(!is_aligned(x86_seg_es, src.val, 64, ctxt, ops),
>> +                              EXC_GP, 0);
>> +        /* Ignore the non-temporal behavior for now. */
>> +        fail_if(!ops->write);
>> +        BUILD_BUG_ON(sizeof(*mmvalp) < 64);
>> +        if ( (rc = ops->read(ea.mem.seg, ea.mem.off, mmvalp, 64,
>> +                             ctxt)) != X86EMUL_OKAY ||
>> +             (rc = ops->write(x86_seg_es, src.val, mmvalp, 64,
>> +                              ctxt)) != X86EMUL_OKAY )
>> +            goto done;
>> +        state->simd_size = simd_none;
>> +        sfence = true;
>> +        break;
>> +
>> +    case X86EMUL_OPC(0x0f38, 0xf9): /* movdiri mem,r */
>> +        vcpu_must_have(movdiri);
>> +        generate_exception_if(dst.type != OP_MEM, EXC_UD);
>> +        /* Ignore the non-temporal behavior for now. */
>> +        dst.val = src.val;
>> +        sfence = true;
>> +        break;
> 
> I'm not certain this gives the required atomicity.  AFAICT, it degrades
> into ops->write(), which can end up with bytewise writes.
> 
> I think we need to map the destination and issue an explicit mov
> instruction.

I don't think so, no - plain MOV has the same property (in particular
when not going through the cache), and also uses the ->write() hook.
It's the hook function that needs to behave properly for all of this
to be correct.

>> --- a/tools/tests/x86_emulator/x86-emulate.c
>> +++ b/tools/tests/x86_emulator/x86-emulate.c
>> @@ -76,6 +76,8 @@ bool emul_test_init(void)
>>        cp.feat.adx = true;
>>        cp.feat.avx512pf = cp.feat.avx512f;
>>        cp.feat.rdpid = true;
>> +    cp.feat.movdiri = true;
>> +    cp.feat.movdir64b = true;
>>        cp.extd.clzero = true;
>>    
>>        if ( cpu_has_xsave )
>> @@ -137,15 +139,15 @@ int emul_test_cpuid(
>>            res->c |= 1U << 22;
>>    
>>        /*
>> -     * The emulator doesn't itself use ADCX/ADOX/RDPID nor the S/G prefetch
>> -     * insns, so we can always run the respective tests.
>> +     * The emulator doesn't itself use ADCX/ADOX/RDPID/MOVDIR* nor the S/G
>> +     * prefetch insns, so we can always run the respective tests.
>>         */
>>        if ( leaf == 7 && subleaf == 0 )
>>        {
>>            res->b |= (1U << 10) | (1U << 19);
>>            if ( res->b & (1U << 16) )
>>                res->b |= 1U << 26;
>> -        res->c |= 1U << 22;
>> +        res->c |= (1U << 22) | (1U << 27) | (1U << 28);
> 
> I've just noticed, but we shouldn't be having both the named variables
> and these unnamed ones.  Is their presence a rebasing issue around
> patches into the test suite?

emul_test_init() gained its current shape from fd35f32b4b, while
emul_test_cpuid() had been left untouched at that point. So I guess
it's more like a forgotten piece of conversion work. I'm unsure
though whether such a conversion is actually a good idea, since aiui
it would mean cloning at least guest_cpuid()'s first switch() into
this function, which is quite a bit more code than there is right
now. Perhaps (the common part of) that switch() could be morphed
into a library function ...

Jan
Andrew Cooper Aug. 28, 2019, 11:51 a.m. UTC | #3
On 28/08/2019 07:26, Jan Beulich wrote:
> On 27.08.2019 18:04, Andrew Cooper wrote:
>> On 01/07/2019 12:58, Jan Beulich wrote:
>>> @@ -9896,6 +9902,32 @@ x86_emulate(
>>>                                : "0" ((uint32_t)src.val), "rm"
>>> (_regs.edx) );
>>>            break;
>>>    +    case X86EMUL_OPC_66(0x0f38, 0xf8): /* movdir64b r,m512 */
>>> +        vcpu_must_have(movdir64b);
>>> +        generate_exception_if(ea.type != OP_MEM, EXC_UD);
>>> +        src.val = truncate_ea(*dst.reg);
>>> +        generate_exception_if(!is_aligned(x86_seg_es, src.val, 64,
>>> ctxt, ops),
>>> +                              EXC_GP, 0);
>>> +        /* Ignore the non-temporal behavior for now. */
>>> +        fail_if(!ops->write);
>>> +        BUILD_BUG_ON(sizeof(*mmvalp) < 64);
>>> +        if ( (rc = ops->read(ea.mem.seg, ea.mem.off, mmvalp, 64,
>>> +                             ctxt)) != X86EMUL_OKAY ||
>>> +             (rc = ops->write(x86_seg_es, src.val, mmvalp, 64,
>>> +                              ctxt)) != X86EMUL_OKAY )
>>> +            goto done;
>>> +        state->simd_size = simd_none;
>>> +        sfence = true;
>>> +        break;
>>> +
>>> +    case X86EMUL_OPC(0x0f38, 0xf9): /* movdiri mem,r */
>>> +        vcpu_must_have(movdiri);
>>> +        generate_exception_if(dst.type != OP_MEM, EXC_UD);
>>> +        /* Ignore the non-temporal behavior for now. */
>>> +        dst.val = src.val;
>>> +        sfence = true;
>>> +        break;
>>
>> I'm not certain this gives the required atomicity.  AFAICT, it degrades
>> into ops->write(), which can end up with bytewise writes.
>>
>> I think we need to map the destination and issue an explicit mov
>> instruction.
>
> I don't think so, no - plain MOV has the same property (in particular
> when not going through the cache), and also uses the ->write() hook.
> It's the hook function that needs to behave properly for all of this
> to be correct.

It only occurred to me after sending this email that plain MOV was
broken as well.

>
>>> --- a/tools/tests/x86_emulator/x86-emulate.c
>>> +++ b/tools/tests/x86_emulator/x86-emulate.c
>>> @@ -76,6 +76,8 @@ bool emul_test_init(void)
>>>        cp.feat.adx = true;
>>>        cp.feat.avx512pf = cp.feat.avx512f;
>>>        cp.feat.rdpid = true;
>>> +    cp.feat.movdiri = true;
>>> +    cp.feat.movdir64b = true;
>>>        cp.extd.clzero = true;
>>>           if ( cpu_has_xsave )
>>> @@ -137,15 +139,15 @@ int emul_test_cpuid(
>>>            res->c |= 1U << 22;
>>>           /*
>>> -     * The emulator doesn't itself use ADCX/ADOX/RDPID nor the S/G
>>> prefetch
>>> -     * insns, so we can always run the respective tests.
>>> +     * The emulator doesn't itself use ADCX/ADOX/RDPID/MOVDIR* nor
>>> the S/G
>>> +     * prefetch insns, so we can always run the respective tests.
>>>         */
>>>        if ( leaf == 7 && subleaf == 0 )
>>>        {
>>>            res->b |= (1U << 10) | (1U << 19);
>>>            if ( res->b & (1U << 16) )
>>>                res->b |= 1U << 26;
>>> -        res->c |= 1U << 22;
>>> +        res->c |= (1U << 22) | (1U << 27) | (1U << 28);
>>
>> I've just noticed, but we shouldn't be having both the named variables
>> and these unnamed ones.  Is their presence a rebasing issue around
>> patches into the test suite?
>
> emul_test_init() gained its current shape from fd35f32b4b, while
> emul_test_cpuid() had been left untouched at that point. So I guess
> it's more like a forgotten piece of conversion work. I'm unsure
> though whether such a conversion is actually a good idea, since aiui
> it would mean cloning at least guest_cpuid()'s first switch() into
> this function, which is quite a bit more code than there is right
> now. Perhaps (the common part of) that switch() could be morphed
> into a library function ...

I'll throw it onto the big pile of CPUID work.  It is not trivial to
library-fy because of the Xen/Viridian leaf handling.

~Andrew
Jan Beulich Aug. 28, 2019, 12:19 p.m. UTC | #4
On 28.08.2019 13:51, Andrew Cooper wrote:
> On 28/08/2019 07:26, Jan Beulich wrote:
>> On 27.08.2019 18:04, Andrew Cooper wrote:
>>> On 01/07/2019 12:58, Jan Beulich wrote:
>>>> @@ -9896,6 +9902,32 @@ x86_emulate(
>>>>                                 : "0" ((uint32_t)src.val), "rm"
>>>> (_regs.edx) );
>>>>             break;
>>>>     +    case X86EMUL_OPC_66(0x0f38, 0xf8): /* movdir64b r,m512 */
>>>> +        vcpu_must_have(movdir64b);
>>>> +        generate_exception_if(ea.type != OP_MEM, EXC_UD);
>>>> +        src.val = truncate_ea(*dst.reg);
>>>> +        generate_exception_if(!is_aligned(x86_seg_es, src.val, 64,
>>>> ctxt, ops),
>>>> +                              EXC_GP, 0);
>>>> +        /* Ignore the non-temporal behavior for now. */
>>>> +        fail_if(!ops->write);
>>>> +        BUILD_BUG_ON(sizeof(*mmvalp) < 64);
>>>> +        if ( (rc = ops->read(ea.mem.seg, ea.mem.off, mmvalp, 64,
>>>> +                             ctxt)) != X86EMUL_OKAY ||
>>>> +             (rc = ops->write(x86_seg_es, src.val, mmvalp, 64,
>>>> +                              ctxt)) != X86EMUL_OKAY )
>>>> +            goto done;
>>>> +        state->simd_size = simd_none;
>>>> +        sfence = true;
>>>> +        break;
>>>> +
>>>> +    case X86EMUL_OPC(0x0f38, 0xf9): /* movdiri mem,r */
>>>> +        vcpu_must_have(movdiri);
>>>> +        generate_exception_if(dst.type != OP_MEM, EXC_UD);
>>>> +        /* Ignore the non-temporal behavior for now. */
>>>> +        dst.val = src.val;
>>>> +        sfence = true;
>>>> +        break;
>>>
>>> I'm not certain this gives the required atomicity.  AFAICT, it degrades
>>> into ops->write(), which can end up with bytewise writes.
>>>
>>> I think we need to map the destination and issue an explicit mov
>>> instruction.
>>
>> I don't think so, no - plain MOV has the same property (in particular
>> when not going through the cache), and also uses the ->write() hook.
>> It's the hook function that needs to behave properly for all of this
>> to be correct.
> 
> It only occurred to me after sending this email that plain MOV was
> broken as well.

Well, they're both in the same state, but it shouldn't be the core
emulator's job to establish a mapping. In fact hvmemul_write() already
calls hvmemul_map_linear_addr(). What's sub-optimal is its reliance on
memcpy()'s implementation (when there's a valid GFN->MFN mapping).

Jan
diff mbox series

Patch

--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -548,6 +548,8 @@  static const struct ext0f38_table {
      [0xf1] = { .to_mem = 1, .two_op = 1 },
      [0xf2 ... 0xf3] = {},
      [0xf5 ... 0xf7] = {},
+    [0xf8] = { .simd_size = simd_other },
+    [0xf9] = { .to_mem = 1 },
  };
  
  /* Shift values between src and dst sizes of pmov{s,z}x{b,w,d}{w,d,q}. */
@@ -1902,6 +1904,8 @@  in_protmode(
  #define vcpu_has_avx512_bitalg() (ctxt->cpuid->feat.avx512_bitalg)
  #define vcpu_has_avx512_vpopcntdq() (ctxt->cpuid->feat.avx512_vpopcntdq)
  #define vcpu_has_rdpid()       (ctxt->cpuid->feat.rdpid)
+#define vcpu_has_movdiri()     (ctxt->cpuid->feat.movdiri)
+#define vcpu_has_movdir64b()   (ctxt->cpuid->feat.movdir64b)
  #define vcpu_has_avx512_4vnniw() (ctxt->cpuid->feat.avx512_4vnniw)
  #define vcpu_has_avx512_4fmaps() (ctxt->cpuid->feat.avx512_4fmaps)
  
@@ -2693,10 +2697,12 @@  x86_decode_0f38(
      {
      case 0x00 ... 0xef:
      case 0xf2 ... 0xf5:
-    case 0xf7 ... 0xff:
+    case 0xf7 ... 0xf8:
+    case 0xfa ... 0xff:
          op_bytes = 0;
          /* fall through */
      case 0xf6: /* adcx / adox */
+    case 0xf9: /* movdiri */
          ctxt->opcode |= MASK_INSR(vex.pfx, X86EMUL_OPC_PFX_MASK);
          break;
  
@@ -9896,6 +9902,32 @@  x86_emulate(
                              : "0" ((uint32_t)src.val), "rm" (_regs.edx) );
          break;
  
+    case X86EMUL_OPC_66(0x0f38, 0xf8): /* movdir64b r,m512 */
+        vcpu_must_have(movdir64b);
+        generate_exception_if(ea.type != OP_MEM, EXC_UD);
+        src.val = truncate_ea(*dst.reg);
+        generate_exception_if(!is_aligned(x86_seg_es, src.val, 64, ctxt, ops),
+                              EXC_GP, 0);
+        /* Ignore the non-temporal behavior for now. */
+        fail_if(!ops->write);
+        BUILD_BUG_ON(sizeof(*mmvalp) < 64);
+        if ( (rc = ops->read(ea.mem.seg, ea.mem.off, mmvalp, 64,
+                             ctxt)) != X86EMUL_OKAY ||
+             (rc = ops->write(x86_seg_es, src.val, mmvalp, 64,
+                              ctxt)) != X86EMUL_OKAY )
+            goto done;
+        state->simd_size = simd_none;
+        sfence = true;
+        break;
+
+    case X86EMUL_OPC(0x0f38, 0xf9): /* movdiri mem,r */
+        vcpu_must_have(movdiri);
+        generate_exception_if(dst.type != OP_MEM, EXC_UD);
+        /* Ignore the non-temporal behavior for now. */
+        dst.val = src.val;
+        sfence = true;
+        break;
+
      case X86EMUL_OPC_VEX_66(0x0f3a, 0x00): /* vpermq $imm8,ymm/m256,ymm */
      case X86EMUL_OPC_VEX_66(0x0f3a, 0x01): /* vpermpd $imm8,ymm/m256,ymm */
          generate_exception_if(!vex.l || !vex.w, EXC_UD);
--- a/xen/include/public/arch-x86/cpufeatureset.h
+++ b/xen/include/public/arch-x86/cpufeatureset.h
@@ -237,6 +237,8 @@  XEN_CPUFEATURE(AVX512_BITALG, 6*32+12) /
  XEN_CPUFEATURE(AVX512_VPOPCNTDQ, 6*32+14) /*A  POPCNT for vectors of DW/QW */
  XEN_CPUFEATURE(RDPID,         6*32+22) /*A  RDPID instruction */
  XEN_CPUFEATURE(CLDEMOTE,      6*32+25) /*A  CLDEMOTE instruction */
+XEN_CPUFEATURE(MOVDIRI,       6*32+27) /*A  MOVDIRI instruction */
+XEN_CPUFEATURE(MOVDIR64B,     6*32+28) /*A  MOVDIR64B instruction */
  
  /* AMD-defined CPU features, CPUID level 0x80000007.edx, word 7 */
  XEN_CPUFEATURE(ITSC,          7*32+ 8) /*   Invariant TSC */
--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -2196,6 +2196,36 @@  int main(int argc, char **argv)
          goto fail;
      printf("okay\n");
  
+    printf("%-40s", "Testing movdiri %edx,(%ecx)...");
+    instr[0] = 0x0f; instr[1] = 0x38; instr[2] = 0xf9; instr[3] = 0x11;
+    regs.eip = (unsigned long)&instr[0];
+    regs.ecx = (unsigned long)memset(res, -1, 16);
+    regs.edx = 0x44332211;
+    rc = x86_emulate(&ctxt, &emulops);
+    if ( (rc != X86EMUL_OKAY) ||
+         (regs.eip != (unsigned long)&instr[4]) ||
+         res[0] != 0x44332211 || ~res[1] )
+        goto fail;
+    printf("okay\n");
+
+    printf("%-40s", "Testing movdir64b 144(%edx),%ecx...");
+    instr[0] = 0x66; instr[1] = 0x0f; instr[2] = 0x38; instr[3] = 0xf8;
+    instr[4] = 0x8a; instr[5] = 0x90; instr[8] = instr[7] = instr[6] = 0;
+    regs.eip = (unsigned long)&instr[0];
+    for ( i = 0; i < 64; ++i )
+        res[i] = i - 20;
+    regs.edx = (unsigned long)res;
+    regs.ecx = (unsigned long)(res + 16);
+    rc = x86_emulate(&ctxt, &emulops);
+    if ( (rc != X86EMUL_OKAY) ||
+         (regs.eip != (unsigned long)&instr[9]) ||
+         res[15] != -5 || res[32] != 12 )
+        goto fail;
+    for ( i = 16; i < 32; ++i )
+        if ( res[i] != i )
+            goto fail;
+    printf("okay\n");
+
      printf("%-40s", "Testing movq %mm3,(%ecx)...");
      if ( stack_exec && cpu_has_mmx )
      {
--- a/tools/tests/x86_emulator/x86-emulate.c
+++ b/tools/tests/x86_emulator/x86-emulate.c
@@ -76,6 +76,8 @@  bool emul_test_init(void)
      cp.feat.adx = true;
      cp.feat.avx512pf = cp.feat.avx512f;
      cp.feat.rdpid = true;
+    cp.feat.movdiri = true;
+    cp.feat.movdir64b = true;
      cp.extd.clzero = true;
  
      if ( cpu_has_xsave )
@@ -137,15 +139,15 @@  int emul_test_cpuid(
          res->c |= 1U << 22;
  
      /*
-     * The emulator doesn't itself use ADCX/ADOX/RDPID nor the S/G prefetch
-     * insns, so we can always run the respective tests.
+     * The emulator doesn't itself use ADCX/ADOX/RDPID/MOVDIR* nor the S/G
+     * prefetch insns, so we can always run the respective tests.
       */
      if ( leaf == 7 && subleaf == 0 )
      {
          res->b |= (1U << 10) | (1U << 19);
          if ( res->b & (1U << 16) )
              res->b |= 1U << 26;
-        res->c |= 1U << 22;
+        res->c |= (1U << 22) | (1U << 27) | (1U << 28);
      }
  
      /*