diff mbox series

[1/6] target/i386: Add AVX512 state when AVX10 is supported

Message ID 20241028024512.156724-2-tao1.su@linux.intel.com (mailing list archive)
State New
Headers show
Series Add AVX10.1 CPUID support and GraniteRapids-v2 model | expand

Commit Message

Tao Su Oct. 28, 2024, 2:45 a.m. UTC
AVX10 state enumeration in CPUID leaf D and enabling in XCR0 register
are identical to AVX512 state regardless of the supported vector lengths.

Given that some E-cores will support AVX10 but not support AVX512, add
AVX512 state components to guest when AVX10 is enabled.

Tested-by: Xuelian Guo <xuelian.guo@intel.com>
Signed-off-by: Tao Su <tao1.su@linux.intel.com>
---
 target/i386/cpu.c | 14 ++++++++++++++
 target/i386/cpu.h |  2 ++
 2 files changed, 16 insertions(+)

Comments

Paolo Bonzini Oct. 28, 2024, 8:41 a.m. UTC | #1
On 10/28/24 03:45, Tao Su wrote:
> AVX10 state enumeration in CPUID leaf D and enabling in XCR0 register
> are identical to AVX512 state regardless of the supported vector lengths.
> 
> Given that some E-cores will support AVX10 but not support AVX512, add
> AVX512 state components to guest when AVX10 is enabled.
> 
> Tested-by: Xuelian Guo <xuelian.guo@intel.com>
> Signed-off-by: Tao Su <tao1.su@linux.intel.com>
> ---
>   target/i386/cpu.c | 14 ++++++++++++++
>   target/i386/cpu.h |  2 ++
>   2 files changed, 16 insertions(+)
> 
> diff --git a/target/i386/cpu.c b/target/i386/cpu.c
> index 1ff1af032e..d845ff5e4e 100644
> --- a/target/i386/cpu.c
> +++ b/target/i386/cpu.c
> @@ -7177,6 +7177,13 @@ static void x86_cpu_reset_hold(Object *obj, ResetType type)
>           }
>           if (env->features[esa->feature] & esa->bits) {
>               xcr0 |= 1ull << i;
> +            continue;
> +        }
> +        if (i == XSTATE_OPMASK_BIT || i == XSTATE_ZMM_Hi256_BIT ||
> +            i == XSTATE_Hi16_ZMM_BIT) {

Can you confirm that XSTATE_ZMM_Hi256_BIT depends on AVX10 and not 
AVX10-512?

Thanks,

Paolo

> +            if (env->features[FEAT_7_1_EDX] & CPUID_7_1_EDX_AVX10) {
> +                xcr0 |= 1ull << i;
> +            }
>           }
>       }
>   
> @@ -7315,6 +7322,13 @@ static void x86_cpu_enable_xsave_components(X86CPU *cpu)
>           const ExtSaveArea *esa = &x86_ext_save_areas[i];
>           if (env->features[esa->feature] & esa->bits) {
>               mask |= (1ULL << i);
> +            continue;
> +        }
> +        if (i == XSTATE_OPMASK_BIT || i == XSTATE_ZMM_Hi256_BIT ||
> +            i == XSTATE_Hi16_ZMM_BIT) {
> +            if (env->features[FEAT_7_1_EDX] & CPUID_7_1_EDX_AVX10) {
> +                mask |= (1ULL << i);
> +            }
>           }
>       }
>   
> diff --git a/target/i386/cpu.h b/target/i386/cpu.h
> index 74886d1580..280bec701c 100644
> --- a/target/i386/cpu.h
> +++ b/target/i386/cpu.h
> @@ -972,6 +972,8 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w);
>   #define CPUID_7_1_EDX_AMX_COMPLEX       (1U << 8)
>   /* PREFETCHIT0/1 Instructions */
>   #define CPUID_7_1_EDX_PREFETCHITI       (1U << 14)
> +/* Support for Advanced Vector Extensions 10 */
> +#define CPUID_7_1_EDX_AVX10             (1U << 19)
>   /* Flexible return and event delivery (FRED) */
>   #define CPUID_7_1_EAX_FRED              (1U << 17)
>   /* Load into IA32_KERNEL_GS_BASE (LKGS) */
Tao Su Oct. 28, 2024, 9:25 a.m. UTC | #2
On Mon, Oct 28, 2024 at 09:41:14AM +0100, Paolo Bonzini wrote:
> On 10/28/24 03:45, Tao Su wrote:
> > AVX10 state enumeration in CPUID leaf D and enabling in XCR0 register
> > are identical to AVX512 state regardless of the supported vector lengths.
> > 
> > Given that some E-cores will support AVX10 but not support AVX512, add
> > AVX512 state components to guest when AVX10 is enabled.
> > 
> > Tested-by: Xuelian Guo <xuelian.guo@intel.com>
> > Signed-off-by: Tao Su <tao1.su@linux.intel.com>
> > ---
> >   target/i386/cpu.c | 14 ++++++++++++++
> >   target/i386/cpu.h |  2 ++
> >   2 files changed, 16 insertions(+)
> > 
> > diff --git a/target/i386/cpu.c b/target/i386/cpu.c
> > index 1ff1af032e..d845ff5e4e 100644
> > --- a/target/i386/cpu.c
> > +++ b/target/i386/cpu.c
> > @@ -7177,6 +7177,13 @@ static void x86_cpu_reset_hold(Object *obj, ResetType type)
> >           }
> >           if (env->features[esa->feature] & esa->bits) {
> >               xcr0 |= 1ull << i;
> > +            continue;
> > +        }
> > +        if (i == XSTATE_OPMASK_BIT || i == XSTATE_ZMM_Hi256_BIT ||
> > +            i == XSTATE_Hi16_ZMM_BIT) {
> 
> Can you confirm that XSTATE_ZMM_Hi256_BIT depends on AVX10 and not
> AVX10-512?
> 

Sorry, I should attach AVX10.2 spec [*].

In 3.1.3, spec said Intel AVX10 state enumeration in CPUID leaf 0xD and
enabling in XCR0 register are identical to Intel AVX-512 regardless of the
maximum vector length supported.

So XSTATE_ZMM_Hi256_BIT doesn't depend on AVX10-512.

[*] https://cdrdv2.intel.com/v1/dl/getContent/828965
Xiaoyao Li Oct. 28, 2024, 3:12 p.m. UTC | #3
On 10/28/2024 10:45 AM, Tao Su wrote:
> AVX10 state enumeration in CPUID leaf D and enabling in XCR0 register
> are identical to AVX512 state regardless of the supported vector lengths.
> 
> Given that some E-cores will support AVX10 but not support AVX512, add
> AVX512 state components to guest when AVX10 is enabled.
> 
> Tested-by: Xuelian Guo <xuelian.guo@intel.com>
> Signed-off-by: Tao Su <tao1.su@linux.intel.com>

Reviewed-by: Xiaoyao Li <xiaoyao.li@intel.com>

> ---
>   target/i386/cpu.c | 14 ++++++++++++++
>   target/i386/cpu.h |  2 ++
>   2 files changed, 16 insertions(+)
> 
> diff --git a/target/i386/cpu.c b/target/i386/cpu.c
> index 1ff1af032e..d845ff5e4e 100644
> --- a/target/i386/cpu.c
> +++ b/target/i386/cpu.c
> @@ -7177,6 +7177,13 @@ static void x86_cpu_reset_hold(Object *obj, ResetType type)
>           }
>           if (env->features[esa->feature] & esa->bits) {
>               xcr0 |= 1ull << i;
> +            continue;
> +        }
> +        if (i == XSTATE_OPMASK_BIT || i == XSTATE_ZMM_Hi256_BIT ||
> +            i == XSTATE_Hi16_ZMM_BIT) {
> +            if (env->features[FEAT_7_1_EDX] & CPUID_7_1_EDX_AVX10) {
> +                xcr0 |= 1ull << i;
> +            }
>           }
>       }
>   
> @@ -7315,6 +7322,13 @@ static void x86_cpu_enable_xsave_components(X86CPU *cpu)
>           const ExtSaveArea *esa = &x86_ext_save_areas[i];
>           if (env->features[esa->feature] & esa->bits) {
>               mask |= (1ULL << i);
> +            continue;
> +        }
> +        if (i == XSTATE_OPMASK_BIT || i == XSTATE_ZMM_Hi256_BIT ||
> +            i == XSTATE_Hi16_ZMM_BIT) {
> +            if (env->features[FEAT_7_1_EDX] & CPUID_7_1_EDX_AVX10) {
> +                mask |= (1ULL << i);
> +            }
>           }
>       }
>   
> diff --git a/target/i386/cpu.h b/target/i386/cpu.h
> index 74886d1580..280bec701c 100644
> --- a/target/i386/cpu.h
> +++ b/target/i386/cpu.h
> @@ -972,6 +972,8 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w);
>   #define CPUID_7_1_EDX_AMX_COMPLEX       (1U << 8)
>   /* PREFETCHIT0/1 Instructions */
>   #define CPUID_7_1_EDX_PREFETCHITI       (1U << 14)
> +/* Support for Advanced Vector Extensions 10 */
> +#define CPUID_7_1_EDX_AVX10             (1U << 19)
>   /* Flexible return and event delivery (FRED) */
>   #define CPUID_7_1_EAX_FRED              (1U << 17)
>   /* Load into IA32_KERNEL_GS_BASE (LKGS) */
Paolo Bonzini Oct. 29, 2024, 8:49 a.m. UTC | #4
On 10/28/24 10:25, Tao Su wrote:
> On Mon, Oct 28, 2024 at 09:41:14AM +0100, Paolo Bonzini wrote:
>> On 10/28/24 03:45, Tao Su wrote:
>>> AVX10 state enumeration in CPUID leaf D and enabling in XCR0 register
>>> are identical to AVX512 state regardless of the supported vector lengths.
>>>
>>> Given that some E-cores will support AVX10 but not support AVX512, add
>>> AVX512 state components to guest when AVX10 is enabled.
>>>
>>> Tested-by: Xuelian Guo <xuelian.guo@intel.com>
>>> Signed-off-by: Tao Su <tao1.su@linux.intel.com>
>>> ---
>>>    target/i386/cpu.c | 14 ++++++++++++++
>>>    target/i386/cpu.h |  2 ++
>>>    2 files changed, 16 insertions(+)
>>>
>>> diff --git a/target/i386/cpu.c b/target/i386/cpu.c
>>> index 1ff1af032e..d845ff5e4e 100644
>>> --- a/target/i386/cpu.c
>>> +++ b/target/i386/cpu.c
>>> @@ -7177,6 +7177,13 @@ static void x86_cpu_reset_hold(Object *obj, ResetType type)
>>>            }
>>>            if (env->features[esa->feature] & esa->bits) {
>>>                xcr0 |= 1ull << i;
>>> +            continue;
>>> +        }
>>> +        if (i == XSTATE_OPMASK_BIT || i == XSTATE_ZMM_Hi256_BIT ||
>>> +            i == XSTATE_Hi16_ZMM_BIT) {
>>
>> Can you confirm that XSTATE_ZMM_Hi256_BIT depends on AVX10 and not
>> AVX10-512?
>>
> 
> Sorry, I should attach AVX10.2 spec [*].
> 
> In 3.1.3, spec said Intel AVX10 state enumeration in CPUID leaf 0xD and
> enabling in XCR0 register are identical to Intel AVX-512 regardless of the
> maximum vector length supported.
> 
> So XSTATE_ZMM_Hi256_BIT doesn't depend on AVX10-512.
> 
> [*] https://cdrdv2.intel.com/v1/dl/getContent/828965

Ok, thanks.

Another related issue is that kvm_cpu_xsave_init() is using esa->feature 
and esa->bits, which misses these three features.

I think we need to change the code to not look at esa->feature at all. 
I'll send a v2 of your series.

Paolo
Tao Su Oct. 29, 2024, 9:29 a.m. UTC | #5
On Tue, Oct 29, 2024 at 09:49:39AM +0100, Paolo Bonzini wrote:
> On 10/28/24 10:25, Tao Su wrote:
> > On Mon, Oct 28, 2024 at 09:41:14AM +0100, Paolo Bonzini wrote:
> > > On 10/28/24 03:45, Tao Su wrote:
> > > > AVX10 state enumeration in CPUID leaf D and enabling in XCR0 register
> > > > are identical to AVX512 state regardless of the supported vector lengths.
> > > > 
> > > > Given that some E-cores will support AVX10 but not support AVX512, add
> > > > AVX512 state components to guest when AVX10 is enabled.
> > > > 
> > > > Tested-by: Xuelian Guo <xuelian.guo@intel.com>
> > > > Signed-off-by: Tao Su <tao1.su@linux.intel.com>
> > > > ---
> > > >    target/i386/cpu.c | 14 ++++++++++++++
> > > >    target/i386/cpu.h |  2 ++
> > > >    2 files changed, 16 insertions(+)
> > > > 
> > > > diff --git a/target/i386/cpu.c b/target/i386/cpu.c
> > > > index 1ff1af032e..d845ff5e4e 100644
> > > > --- a/target/i386/cpu.c
> > > > +++ b/target/i386/cpu.c
> > > > @@ -7177,6 +7177,13 @@ static void x86_cpu_reset_hold(Object *obj, ResetType type)
> > > >            }
> > > >            if (env->features[esa->feature] & esa->bits) {
> > > >                xcr0 |= 1ull << i;
> > > > +            continue;
> > > > +        }
> > > > +        if (i == XSTATE_OPMASK_BIT || i == XSTATE_ZMM_Hi256_BIT ||
> > > > +            i == XSTATE_Hi16_ZMM_BIT) {
> > > 
> > > Can you confirm that XSTATE_ZMM_Hi256_BIT depends on AVX10 and not
> > > AVX10-512?
> > > 
> > 
> > Sorry, I should attach AVX10.2 spec [*].
> > 
> > In 3.1.3, spec said Intel AVX10 state enumeration in CPUID leaf 0xD and
> > enabling in XCR0 register are identical to Intel AVX-512 regardless of the
> > maximum vector length supported.
> > 
> > So XSTATE_ZMM_Hi256_BIT doesn't depend on AVX10-512.
> > 
> > [*] https://cdrdv2.intel.com/v1/dl/getContent/828965
> 
> Ok, thanks.
> 
> Another related issue is that kvm_cpu_xsave_init() is using esa->feature and
> esa->bits, which misses these three features.

Yes, it has issue if AVX512F is not reported but AVX10 is reported, thanks for
pointing out!

> 
> I think we need to change the code to not look at esa->feature at all. I'll
> send a v2 of your series.
> 

Yes, ExtSaveArea can't set more feature bits, which makes the code a bit ugly.
Looking forward to the better implementation :-)
diff mbox series

Patch

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 1ff1af032e..d845ff5e4e 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -7177,6 +7177,13 @@  static void x86_cpu_reset_hold(Object *obj, ResetType type)
         }
         if (env->features[esa->feature] & esa->bits) {
             xcr0 |= 1ull << i;
+            continue;
+        }
+        if (i == XSTATE_OPMASK_BIT || i == XSTATE_ZMM_Hi256_BIT ||
+            i == XSTATE_Hi16_ZMM_BIT) {
+            if (env->features[FEAT_7_1_EDX] & CPUID_7_1_EDX_AVX10) {
+                xcr0 |= 1ull << i;
+            }
         }
     }
 
@@ -7315,6 +7322,13 @@  static void x86_cpu_enable_xsave_components(X86CPU *cpu)
         const ExtSaveArea *esa = &x86_ext_save_areas[i];
         if (env->features[esa->feature] & esa->bits) {
             mask |= (1ULL << i);
+            continue;
+        }
+        if (i == XSTATE_OPMASK_BIT || i == XSTATE_ZMM_Hi256_BIT ||
+            i == XSTATE_Hi16_ZMM_BIT) {
+            if (env->features[FEAT_7_1_EDX] & CPUID_7_1_EDX_AVX10) {
+                mask |= (1ULL << i);
+            }
         }
     }
 
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 74886d1580..280bec701c 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -972,6 +972,8 @@  uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w);
 #define CPUID_7_1_EDX_AMX_COMPLEX       (1U << 8)
 /* PREFETCHIT0/1 Instructions */
 #define CPUID_7_1_EDX_PREFETCHITI       (1U << 14)
+/* Support for Advanced Vector Extensions 10 */
+#define CPUID_7_1_EDX_AVX10             (1U << 19)
 /* Flexible return and event delivery (FRED) */
 #define CPUID_7_1_EAX_FRED              (1U << 17)
 /* Load into IA32_KERNEL_GS_BASE (LKGS) */