diff mbox series

[v3] i386/cpu_dump: support AVX512 ZMM regs dump

Message ID 1616572804-7898-1-git-send-email-robert.hu@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series [v3] i386/cpu_dump: support AVX512 ZMM regs dump | expand

Commit Message

Robert Hoo March 24, 2021, 8 a.m. UTC
Since commit fa4518741e (target-i386: Rename struct XMMReg to ZMMReg),
CPUX86State.xmm_regs[] has already been extended to 512bit to support
AVX512.
Also, other qemu level supports for AVX512 registers are there for
years.
But in x86_cpu_dump_state(), still only dump XMM registers no matter
YMM/ZMM is enabled.
This patch is to complement this, let it dump XMM/YMM/ZMM accordingly.

Signed-off-by: Robert Hoo <robert.hu@linux.intel.com>
---
Changelog:
v3: fix some coding style issue.
v2: dump XMM/YMM/ZMM according to XSAVE state-components enablement.

 target/i386/cpu-dump.c | 55 ++++++++++++++++++++++++++++++++++++--------------
 target/i386/cpu.h      | 11 ++++++++++
 2 files changed, 51 insertions(+), 15 deletions(-)

Comments

Richard Henderson March 24, 2021, 1:44 p.m. UTC | #1
On 3/24/21 2:00 AM, Robert Hoo wrote:
> +        if ((env->xcr0 & XFEATURE_AVX512) == XFEATURE_AVX512) {
> +            /* XSAVE enabled AVX512 */
> +            nb = (env->hflags & HF_CS64_MASK) ? 32 : 8;
> +            for (i = 0; i < nb; i++) {
> +                qemu_fprintf(f, "ZMM%02d=0x%016lx %016lx %016lx %016lx %016lx "
> +                                "%016lx %016lx %016lx\n",
> +                             i,
> +                             env->xmm_regs[i].ZMM_Q(7),
> +                             env->xmm_regs[i].ZMM_Q(6),
> +                             env->xmm_regs[i].ZMM_Q(5),
> +                             env->xmm_regs[i].ZMM_Q(4),
> +                             env->xmm_regs[i].ZMM_Q(3),
> +                             env->xmm_regs[i].ZMM_Q(2),
> +                             env->xmm_regs[i].ZMM_Q(1),
> +                             env->xmm_regs[i].ZMM_Q(0));
> +            }

Dump opmask regs?

> +        } else if (env->xcr0 & XFEATURE_AVX) {

This is normally a 2-bit test.

> +            /* XSAVE enabled AVX */
> +            nb = env->hflags & HF_CS64_MASK ? 16 : 8;
> +            for (i = 0; i < nb; i++) {
> +                qemu_fprintf(f, "YMM%02d=0x%016lx %016lx %016lx %016lx\n",
> +                             i,
> +                             env->xmm_regs[i].ZMM_Q(3),
> +                             env->xmm_regs[i].ZMM_Q(2),
> +                             env->xmm_regs[i].ZMM_Q(1),
> +                             env->xmm_regs[i].ZMM_Q(0));
> +            }
> +        } else { /* SSE and below cases */
> +            nb = env->hflags & HF_CS64_MASK ? 16 : 8;
> +            for (i = 0; i < nb; i++) {
> +                qemu_fprintf(f, "XMM%02d=0x%016lx %016lx",
> +                             i,
> +                             env->xmm_regs[i].ZMM_Q(1),
> +                             env->xmm_regs[i].ZMM_Q(0));
> +                if ((i & 1) == 1)
> +                    qemu_fprintf(f, "\n");
> +                else
> +                    qemu_fprintf(f, " ");

I'd be tempted to merge that second printf into the first, with "%s" and (i & 1 
? "\n" : " ").  Otherwise you'll need to add braces to that IF to satisfy 
checkpatch.

> +#define XFEATURE_X87        (1UL << 0)
> +#define XFEATURE_SSE        (1UL << 1)
> +#define XFEATURE_AVX        (1UL << 2)
> +#define XFEATURE_AVX512_OPMASK          (1UL << 5)
> +#define XFEATURE_AVX512_ZMM_Hi256       (1UL << 6)
> +#define XFEATURE_AVX512_Hi16_ZMM        (1UL << 7)
> +#define XFEATURE_AVX512     (XFEATURE_AVX512_OPMASK | \
> +                             XFEATURE_AVX512_ZMM_Hi256 | \
> +                             XFEATURE_AVX512_Hi16_ZMM)

Except for the last, these already exist under the name XSTATE_*_MASK.

I think you can just as well declare local variables to hold the 3 bits for the 
avx512 test and the 2 bits for the avx test.


r~
Robert Hoo March 25, 2021, 3:15 a.m. UTC | #2
On Wed, 2021-03-24 at 07:44 -0600, Richard Henderson wrote:
> On 3/24/21 2:00 AM, Robert Hoo wrote:
> > +        if ((env->xcr0 & XFEATURE_AVX512) == XFEATURE_AVX512) {
> > +            /* XSAVE enabled AVX512 */
> > +            nb = (env->hflags & HF_CS64_MASK) ? 32 : 8;
> > +            for (i = 0; i < nb; i++) {
> > +                qemu_fprintf(f, "ZMM%02d=0x%016lx %016lx %016lx
> > %016lx %016lx "
> > +                                "%016lx %016lx %016lx\n",
> > +                             i,
> > +                             env->xmm_regs[i].ZMM_Q(7),
> > +                             env->xmm_regs[i].ZMM_Q(6),
> > +                             env->xmm_regs[i].ZMM_Q(5),
> > +                             env->xmm_regs[i].ZMM_Q(4),
> > +                             env->xmm_regs[i].ZMM_Q(3),
> > +                             env->xmm_regs[i].ZMM_Q(2),
> > +                             env->xmm_regs[i].ZMM_Q(1),
> > +                             env->xmm_regs[i].ZMM_Q(0));
> > +            }
> 
> Dump opmask regs?

OK
> 
> > +        } else if (env->xcr0 & XFEATURE_AVX) {
> 
> This is normally a 2-bit test.

I beg your pardon. What 2 bits?
> 
> > +            /* XSAVE enabled AVX */
> > +            nb = env->hflags & HF_CS64_MASK ? 16 : 8;
> > +            for (i = 0; i < nb; i++) {
> > +                qemu_fprintf(f, "YMM%02d=0x%016lx %016lx %016lx
> > %016lx\n",
> > +                             i,
> > +                             env->xmm_regs[i].ZMM_Q(3),
> > +                             env->xmm_regs[i].ZMM_Q(2),
> > +                             env->xmm_regs[i].ZMM_Q(1),
> > +                             env->xmm_regs[i].ZMM_Q(0));
> > +            }
> > +        } else { /* SSE and below cases */
> > +            nb = env->hflags & HF_CS64_MASK ? 16 : 8;
> > +            for (i = 0; i < nb; i++) {
> > +                qemu_fprintf(f, "XMM%02d=0x%016lx %016lx",
> > +                             i,
> > +                             env->xmm_regs[i].ZMM_Q(1),
> > +                             env->xmm_regs[i].ZMM_Q(0));
> > +                if ((i & 1) == 1)
> > +                    qemu_fprintf(f, "\n");
> > +                else
> > +                    qemu_fprintf(f, " ");
> 
> I'd be tempted to merge that second printf into the first, with "%s"
> and (i & 1 
> ? "\n" : " ").  Otherwise you'll need to add braces to that IF to
> satisfy 
> checkpatch.

Sure. I just retained previous code.
BTW, checkpatch didn't warn me on this. It escaped.:)
> 
> > +#define XFEATURE_X87        (1UL << 0)
> > +#define XFEATURE_SSE        (1UL << 1)
> > +#define XFEATURE_AVX        (1UL << 2)
> > +#define XFEATURE_AVX512_OPMASK          (1UL << 5)
> > +#define XFEATURE_AVX512_ZMM_Hi256       (1UL << 6)
> > +#define XFEATURE_AVX512_Hi16_ZMM        (1UL << 7)
> > +#define XFEATURE_AVX512     (XFEATURE_AVX512_OPMASK | \
> > +                             XFEATURE_AVX512_ZMM_Hi256 | \
> > +                             XFEATURE_AVX512_Hi16_ZMM)
> 
> Except for the last, these already exist under the name
> XSTATE_*_MASK.

Ah, my poor eye sight. They even exist in the same file. Thanks
pointing out.
> 
> I think you can just as well declare local variables to hold the 3
> bits for the 
> avx512 test and the 2 bits for the avx test.
> 
Sure.
> 
> r~
Richard Henderson March 25, 2021, 12:39 p.m. UTC | #3
On 3/24/21 9:15 PM, Robert Hoo wrote:
>>> +        } else if (env->xcr0 & XFEATURE_AVX) {
>>
>> This is normally a 2-bit test.
> 
> I beg your pardon. What 2 bits?

I forget the names, but isn't the usual test xcr0 & 6 == 6?

> BTW, checkpatch didn't warn me on this. It escaped.:)

Heh.


r~
Robert Hoo March 26, 2021, 1:47 a.m. UTC | #4
On Thu, 2021-03-25 at 06:39 -0600, Richard Henderson wrote:
> On 3/24/21 9:15 PM, Robert Hoo wrote:
> > > > +        } else if (env->xcr0 & XFEATURE_AVX) {
> > > 
> > > This is normally a 2-bit test.
> > 
> > I beg your pardon. What 2 bits?
> 
> I forget the names, but isn't the usual test xcr0 & 6 == 6?

6 stands for SSE state-component and AVX state-component.
I'm not sure about this.
Can you remember where did you this "xcr0 & 6 == 6"? I can look into
that.
> 
> > BTW, checkpatch didn't warn me on this. It escaped.:)
> 
> Heh.
> 
> 
> r~
Richard Henderson March 26, 2021, 1:11 p.m. UTC | #5
On 3/25/21 7:47 PM, Robert Hoo wrote:
> On Thu, 2021-03-25 at 06:39 -0600, Richard Henderson wrote:
>> On 3/24/21 9:15 PM, Robert Hoo wrote:
>>>>> +        } else if (env->xcr0 & XFEATURE_AVX) {
>>>>
>>>> This is normally a 2-bit test.
>>>
>>> I beg your pardon. What 2 bits?
>>
>> I forget the names, but isn't the usual test xcr0 & 6 == 6?
> 
> 6 stands for SSE state-component and AVX state-component.
> I'm not sure about this.
> Can you remember where did you this "xcr0 & 6 == 6"? I can look into
> that.

IA-64 and IA32 Software developers manual, Vol 1 Basic Architecture, Section 
14.3 Detection of AVX instructions.


r~
Robert Hoo March 26, 2021, 2:16 p.m. UTC | #6
On Fri, 2021-03-26 at 07:11 -0600, Richard Henderson wrote:
> On 3/25/21 7:47 PM, Robert Hoo wrote:
> > On Thu, 2021-03-25 at 06:39 -0600, Richard Henderson wrote:
> > > On 3/24/21 9:15 PM, Robert Hoo wrote:
> > > > > > +        } else if (env->xcr0 & XFEATURE_AVX) {
> > > > > 
> > > > > This is normally a 2-bit test.
> > > > 
> > > > I beg your pardon. What 2 bits?
> > > 
> > > I forget the names, but isn't the usual test xcr0 & 6 == 6?
> > 
> > 6 stands for SSE state-component and AVX state-component.
> > I'm not sure about this.
> > Can you remember where did you this "xcr0 & 6 == 6"? I can look
> > into
> > that.
> 
> IA-64 and IA32 Software developers manual, Vol 1 Basic Architecture,
> Section 
> 14.3 Detection of AVX instructions.

OK, thanks Richard. If use the feature detection criteria here, then
AVX512 case will also need XCR0[2:1]='11b'.
I'm going to send v4 soon.
> 
> 
> r~
diff mbox series

Patch

diff --git a/target/i386/cpu-dump.c b/target/i386/cpu-dump.c
index aac21f1..00fb56f 100644
--- a/target/i386/cpu-dump.c
+++ b/target/i386/cpu-dump.c
@@ -499,21 +499,46 @@  void x86_cpu_dump_state(CPUState *cs, FILE *f, int flags)
             else
                 qemu_fprintf(f, " ");
         }
-        if (env->hflags & HF_CS64_MASK)
-            nb = 16;
-        else
-            nb = 8;
-        for(i=0;i<nb;i++) {
-            qemu_fprintf(f, "XMM%02d=%08x%08x%08x%08x",
-                         i,
-                         env->xmm_regs[i].ZMM_L(3),
-                         env->xmm_regs[i].ZMM_L(2),
-                         env->xmm_regs[i].ZMM_L(1),
-                         env->xmm_regs[i].ZMM_L(0));
-            if ((i & 1) == 1)
-                qemu_fprintf(f, "\n");
-            else
-                qemu_fprintf(f, " ");
+
+        if ((env->xcr0 & XFEATURE_AVX512) == XFEATURE_AVX512) {
+            /* XSAVE enabled AVX512 */
+            nb = (env->hflags & HF_CS64_MASK) ? 32 : 8;
+            for (i = 0; i < nb; i++) {
+                qemu_fprintf(f, "ZMM%02d=0x%016lx %016lx %016lx %016lx %016lx "
+                                "%016lx %016lx %016lx\n",
+                             i,
+                             env->xmm_regs[i].ZMM_Q(7),
+                             env->xmm_regs[i].ZMM_Q(6),
+                             env->xmm_regs[i].ZMM_Q(5),
+                             env->xmm_regs[i].ZMM_Q(4),
+                             env->xmm_regs[i].ZMM_Q(3),
+                             env->xmm_regs[i].ZMM_Q(2),
+                             env->xmm_regs[i].ZMM_Q(1),
+                             env->xmm_regs[i].ZMM_Q(0));
+            }
+        } else if (env->xcr0 & XFEATURE_AVX) {
+            /* XSAVE enabled AVX */
+            nb = env->hflags & HF_CS64_MASK ? 16 : 8;
+            for (i = 0; i < nb; i++) {
+                qemu_fprintf(f, "YMM%02d=0x%016lx %016lx %016lx %016lx\n",
+                             i,
+                             env->xmm_regs[i].ZMM_Q(3),
+                             env->xmm_regs[i].ZMM_Q(2),
+                             env->xmm_regs[i].ZMM_Q(1),
+                             env->xmm_regs[i].ZMM_Q(0));
+            }
+        } else { /* SSE and below cases */
+            nb = env->hflags & HF_CS64_MASK ? 16 : 8;
+            for (i = 0; i < nb; i++) {
+                qemu_fprintf(f, "XMM%02d=0x%016lx %016lx",
+                             i,
+                             env->xmm_regs[i].ZMM_Q(1),
+                             env->xmm_regs[i].ZMM_Q(0));
+                if ((i & 1) == 1)
+                    qemu_fprintf(f, "\n");
+                else
+                    qemu_fprintf(f, " ");
+            }
         }
     }
     if (flags & CPU_DUMP_CODE) {
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 570f916..82f5d56 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -249,6 +249,17 @@  typedef enum X86Seg {
 #define CR4_PKE_MASK   (1U << 22)
 #define CR4_PKS_MASK   (1U << 24)
 
+#define XFEATURE_X87        (1UL << 0)
+#define XFEATURE_SSE        (1UL << 1)
+#define XFEATURE_AVX        (1UL << 2)
+#define XFEATURE_AVX512_OPMASK          (1UL << 5)
+#define XFEATURE_AVX512_ZMM_Hi256       (1UL << 6)
+#define XFEATURE_AVX512_Hi16_ZMM        (1UL << 7)
+#define XFEATURE_AVX512     (XFEATURE_AVX512_OPMASK | \
+                             XFEATURE_AVX512_ZMM_Hi256 | \
+                             XFEATURE_AVX512_Hi16_ZMM)
+
+
 #define DR6_BD          (1 << 13)
 #define DR6_BS          (1 << 14)
 #define DR6_BT          (1 << 15)