diff mbox series

[v7,08/16] i386: Expose module level in CPUID[0x1F]

Message ID 20240108082727.420817-9-zhao1.liu@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series Support smp.clusters for x86 in QEMU | expand

Commit Message

Zhao Liu Jan. 8, 2024, 8:27 a.m. UTC
From: Zhao Liu <zhao1.liu@intel.com>

Linux kernel (from v6.4, with commit edc0a2b595765 ("x86/topology: Fix
erroneous smp_num_siblings on Intel Hybrid platforms") is able to
handle platforms with Module level enumerated via CPUID.1F.

Expose the module level in CPUID[0x1F] if the machine has more than 1
modules.

(Tested CPU topology in CPUID[0x1F] leaf with various die/cluster
configurations in "-smp".)

Signed-off-by: Zhao Liu <zhao1.liu@intel.com>
Tested-by: Babu Moger <babu.moger@amd.com>
Tested-by: Yongwei Ma <yongwei.ma@intel.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
---
Changes since v3:
 * New patch to expose module level in 0x1F.
 * Add Tested-by tag from Yongwei.
---
 target/i386/cpu.c     | 12 +++++++++++-
 target/i386/cpu.h     |  2 ++
 target/i386/kvm/kvm.c |  2 +-
 3 files changed, 14 insertions(+), 2 deletions(-)

Comments

Xiaoyao Li Jan. 11, 2024, 6:04 a.m. UTC | #1
On 1/8/2024 4:27 PM, Zhao Liu wrote:
> From: Zhao Liu <zhao1.liu@intel.com>
> 
> Linux kernel (from v6.4, with commit edc0a2b595765 ("x86/topology: Fix
> erroneous smp_num_siblings on Intel Hybrid platforms") is able to
> handle platforms with Module level enumerated via CPUID.1F.
> 
> Expose the module level in CPUID[0x1F] if the machine has more than 1
> modules.
> 
> (Tested CPU topology in CPUID[0x1F] leaf with various die/cluster
> configurations in "-smp".)
> 
> Signed-off-by: Zhao Liu <zhao1.liu@intel.com>
> Tested-by: Babu Moger <babu.moger@amd.com>
> Tested-by: Yongwei Ma <yongwei.ma@intel.com>
> Acked-by: Michael S. Tsirkin <mst@redhat.com>
> ---
> Changes since v3:
>   * New patch to expose module level in 0x1F.
>   * Add Tested-by tag from Yongwei.
> ---
>   target/i386/cpu.c     | 12 +++++++++++-
>   target/i386/cpu.h     |  2 ++
>   target/i386/kvm/kvm.c |  2 +-
>   3 files changed, 14 insertions(+), 2 deletions(-)
> 
> diff --git a/target/i386/cpu.c b/target/i386/cpu.c
> index 294ca6b8947a..a2d39d2198b6 100644
> --- a/target/i386/cpu.c
> +++ b/target/i386/cpu.c
> @@ -277,6 +277,8 @@ static uint32_t num_cpus_by_topo_level(X86CPUTopoInfo *topo_info,
>           return 1;
>       case CPU_TOPO_LEVEL_CORE:
>           return topo_info->threads_per_core;
> +    case CPU_TOPO_LEVEL_MODULE:
> +        return topo_info->threads_per_core * topo_info->cores_per_module;
>       case CPU_TOPO_LEVEL_DIE:
>           return topo_info->threads_per_core * topo_info->cores_per_module *
>                  topo_info->modules_per_die;
> @@ -297,6 +299,8 @@ static uint32_t apicid_offset_by_topo_level(X86CPUTopoInfo *topo_info,
>           return 0;
>       case CPU_TOPO_LEVEL_CORE:
>           return apicid_core_offset(topo_info);
> +    case CPU_TOPO_LEVEL_MODULE:
> +        return apicid_module_offset(topo_info);
>       case CPU_TOPO_LEVEL_DIE:
>           return apicid_die_offset(topo_info);
>       case CPU_TOPO_LEVEL_PACKAGE:
> @@ -316,6 +320,8 @@ static uint32_t cpuid1f_topo_type(enum CPUTopoLevel topo_level)
>           return CPUID_1F_ECX_TOPO_LEVEL_SMT;
>       case CPU_TOPO_LEVEL_CORE:
>           return CPUID_1F_ECX_TOPO_LEVEL_CORE;
> +    case CPU_TOPO_LEVEL_MODULE:
> +        return CPUID_1F_ECX_TOPO_LEVEL_MODULE;
>       case CPU_TOPO_LEVEL_DIE:
>           return CPUID_1F_ECX_TOPO_LEVEL_DIE;
>       default:
> @@ -347,6 +353,10 @@ static void encode_topo_cpuid1f(CPUX86State *env, uint32_t count,
>           if (env->nr_dies > 1) {
>               set_bit(CPU_TOPO_LEVEL_DIE, topo_bitmap);
>           }
> +
> +        if (env->nr_modules > 1) {
> +            set_bit(CPU_TOPO_LEVEL_MODULE, topo_bitmap);
> +        }
>       }
>   
>       *ecx = count & 0xff;
> @@ -6394,7 +6404,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
>           break;
>       case 0x1F:
>           /* V2 Extended Topology Enumeration Leaf */
> -        if (topo_info.dies_per_pkg < 2) {
> +        if (topo_info.modules_per_die < 2 && topo_info.dies_per_pkg < 2) {

maybe we can come up with below function if we have 
env->valid_cpu_topo[] as I suggested in patch 5.

bool cpu_x86_has_valid_cpuid1f(CPUX86State *env) {
	return env->valid_cpu_topo[2] ? true : false;
}

...

>               *eax = *ebx = *ecx = *edx = 0;
>               break;
>           }
> diff --git a/target/i386/cpu.h b/target/i386/cpu.h
> index eecd30bde92b..97b290e10576 100644
> --- a/target/i386/cpu.h
> +++ b/target/i386/cpu.h
> @@ -1018,6 +1018,7 @@ enum CPUTopoLevel {
>       CPU_TOPO_LEVEL_INVALID,
>       CPU_TOPO_LEVEL_SMT,
>       CPU_TOPO_LEVEL_CORE,
> +    CPU_TOPO_LEVEL_MODULE,
>       CPU_TOPO_LEVEL_DIE,
>       CPU_TOPO_LEVEL_PACKAGE,
>       CPU_TOPO_LEVEL_MAX,
> @@ -1032,6 +1033,7 @@ enum CPUTopoLevel {
>   #define CPUID_1F_ECX_TOPO_LEVEL_INVALID  CPUID_B_ECX_TOPO_LEVEL_INVALID
>   #define CPUID_1F_ECX_TOPO_LEVEL_SMT      CPUID_B_ECX_TOPO_LEVEL_SMT
>   #define CPUID_1F_ECX_TOPO_LEVEL_CORE     CPUID_B_ECX_TOPO_LEVEL_CORE
> +#define CPUID_1F_ECX_TOPO_LEVEL_MODULE   3
>   #define CPUID_1F_ECX_TOPO_LEVEL_DIE      5
>   
>   /* MSR Feature Bits */
> diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
> index 4ce80555b45c..e5ddb214cb36 100644
> --- a/target/i386/kvm/kvm.c
> +++ b/target/i386/kvm/kvm.c
> @@ -1913,7 +1913,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
>               break;
>           }
>           case 0x1f:
> -            if (env->nr_dies < 2) {
> +            if (env->nr_modules < 2 && env->nr_dies < 2) {

then cpu_x86_has_valid_cpuid1f() can be used here.

>                   break;
>               }
>               /* fallthrough */
Zhao Liu Jan. 11, 2024, 9:21 a.m. UTC | #2
Hi Xiaoyao,

On Thu, Jan 11, 2024 at 02:04:53PM +0800, Xiaoyao Li wrote:
> Date: Thu, 11 Jan 2024 14:04:53 +0800
> From: Xiaoyao Li <xiaoyao.li@intel.com>
> Subject: Re: [PATCH v7 08/16] i386: Expose module level in CPUID[0x1F]
> 
> On 1/8/2024 4:27 PM, Zhao Liu wrote:
> > From: Zhao Liu <zhao1.liu@intel.com>
> > 
> > Linux kernel (from v6.4, with commit edc0a2b595765 ("x86/topology: Fix
> > erroneous smp_num_siblings on Intel Hybrid platforms") is able to
> > handle platforms with Module level enumerated via CPUID.1F.
> > 
> > Expose the module level in CPUID[0x1F] if the machine has more than 1
> > modules.
> > 
> > (Tested CPU topology in CPUID[0x1F] leaf with various die/cluster
> > configurations in "-smp".)
> > 
> > Signed-off-by: Zhao Liu <zhao1.liu@intel.com>
> > Tested-by: Babu Moger <babu.moger@amd.com>
> > Tested-by: Yongwei Ma <yongwei.ma@intel.com>
> > Acked-by: Michael S. Tsirkin <mst@redhat.com>
> > ---
> > Changes since v3:
> >   * New patch to expose module level in 0x1F.
> >   * Add Tested-by tag from Yongwei.
> > ---
> >   target/i386/cpu.c     | 12 +++++++++++-
> >   target/i386/cpu.h     |  2 ++
> >   target/i386/kvm/kvm.c |  2 +-
> >   3 files changed, 14 insertions(+), 2 deletions(-)
> > 
> > diff --git a/target/i386/cpu.c b/target/i386/cpu.c
> > index 294ca6b8947a..a2d39d2198b6 100644
> > --- a/target/i386/cpu.c
> > +++ b/target/i386/cpu.c
> > @@ -277,6 +277,8 @@ static uint32_t num_cpus_by_topo_level(X86CPUTopoInfo *topo_info,
> >           return 1;
> >       case CPU_TOPO_LEVEL_CORE:
> >           return topo_info->threads_per_core;
> > +    case CPU_TOPO_LEVEL_MODULE:
> > +        return topo_info->threads_per_core * topo_info->cores_per_module;
> >       case CPU_TOPO_LEVEL_DIE:
> >           return topo_info->threads_per_core * topo_info->cores_per_module *
> >                  topo_info->modules_per_die;
> > @@ -297,6 +299,8 @@ static uint32_t apicid_offset_by_topo_level(X86CPUTopoInfo *topo_info,
> >           return 0;
> >       case CPU_TOPO_LEVEL_CORE:
> >           return apicid_core_offset(topo_info);
> > +    case CPU_TOPO_LEVEL_MODULE:
> > +        return apicid_module_offset(topo_info);
> >       case CPU_TOPO_LEVEL_DIE:
> >           return apicid_die_offset(topo_info);
> >       case CPU_TOPO_LEVEL_PACKAGE:
> > @@ -316,6 +320,8 @@ static uint32_t cpuid1f_topo_type(enum CPUTopoLevel topo_level)
> >           return CPUID_1F_ECX_TOPO_LEVEL_SMT;
> >       case CPU_TOPO_LEVEL_CORE:
> >           return CPUID_1F_ECX_TOPO_LEVEL_CORE;
> > +    case CPU_TOPO_LEVEL_MODULE:
> > +        return CPUID_1F_ECX_TOPO_LEVEL_MODULE;
> >       case CPU_TOPO_LEVEL_DIE:
> >           return CPUID_1F_ECX_TOPO_LEVEL_DIE;
> >       default:
> > @@ -347,6 +353,10 @@ static void encode_topo_cpuid1f(CPUX86State *env, uint32_t count,
> >           if (env->nr_dies > 1) {
> >               set_bit(CPU_TOPO_LEVEL_DIE, topo_bitmap);
> >           }
> > +
> > +        if (env->nr_modules > 1) {
> > +            set_bit(CPU_TOPO_LEVEL_MODULE, topo_bitmap);
> > +        }
> >       }
> >       *ecx = count & 0xff;
> > @@ -6394,7 +6404,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
> >           break;
> >       case 0x1F:
> >           /* V2 Extended Topology Enumeration Leaf */
> > -        if (topo_info.dies_per_pkg < 2) {
> > +        if (topo_info.modules_per_die < 2 && topo_info.dies_per_pkg < 2) {
> 
> maybe we can come up with below function if we have env->valid_cpu_topo[] as
> I suggested in patch 5.
> 
> bool cpu_x86_has_valid_cpuid1f(CPUX86State *env) {
> 	return env->valid_cpu_topo[2] ? true : false;
> }
> 
> ...

This makes sense.

> 
> >               *eax = *ebx = *ecx = *edx = 0;
> >               break;
> >           }
> > diff --git a/target/i386/cpu.h b/target/i386/cpu.h
> > index eecd30bde92b..97b290e10576 100644
> > --- a/target/i386/cpu.h
> > +++ b/target/i386/cpu.h
> > @@ -1018,6 +1018,7 @@ enum CPUTopoLevel {
> >       CPU_TOPO_LEVEL_INVALID,
> >       CPU_TOPO_LEVEL_SMT,
> >       CPU_TOPO_LEVEL_CORE,
> > +    CPU_TOPO_LEVEL_MODULE,
> >       CPU_TOPO_LEVEL_DIE,
> >       CPU_TOPO_LEVEL_PACKAGE,
> >       CPU_TOPO_LEVEL_MAX,
> > @@ -1032,6 +1033,7 @@ enum CPUTopoLevel {
> >   #define CPUID_1F_ECX_TOPO_LEVEL_INVALID  CPUID_B_ECX_TOPO_LEVEL_INVALID
> >   #define CPUID_1F_ECX_TOPO_LEVEL_SMT      CPUID_B_ECX_TOPO_LEVEL_SMT
> >   #define CPUID_1F_ECX_TOPO_LEVEL_CORE     CPUID_B_ECX_TOPO_LEVEL_CORE
> > +#define CPUID_1F_ECX_TOPO_LEVEL_MODULE   3
> >   #define CPUID_1F_ECX_TOPO_LEVEL_DIE      5
> >   /* MSR Feature Bits */
> > diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
> > index 4ce80555b45c..e5ddb214cb36 100644
> > --- a/target/i386/kvm/kvm.c
> > +++ b/target/i386/kvm/kvm.c
> > @@ -1913,7 +1913,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
> >               break;
> >           }
> >           case 0x1f:
> > -            if (env->nr_dies < 2) {
> > +            if (env->nr_modules < 2 && env->nr_dies < 2) {
> 
> then cpu_x86_has_valid_cpuid1f() can be used here.
>

Good idae, I will also try this.

Thanks,
Zhao
Yuan Yao Jan. 15, 2024, 3:25 a.m. UTC | #3
On Mon, Jan 08, 2024 at 04:27:19PM +0800, Zhao Liu wrote:
> From: Zhao Liu <zhao1.liu@intel.com>
>
> Linux kernel (from v6.4, with commit edc0a2b595765 ("x86/topology: Fix
> erroneous smp_num_siblings on Intel Hybrid platforms") is able to
> handle platforms with Module level enumerated via CPUID.1F.
>
> Expose the module level in CPUID[0x1F] if the machine has more than 1
> modules.
>
> (Tested CPU topology in CPUID[0x1F] leaf with various die/cluster
> configurations in "-smp".)
>
> Signed-off-by: Zhao Liu <zhao1.liu@intel.com>
> Tested-by: Babu Moger <babu.moger@amd.com>
> Tested-by: Yongwei Ma <yongwei.ma@intel.com>
> Acked-by: Michael S. Tsirkin <mst@redhat.com>
> ---
> Changes since v3:
>  * New patch to expose module level in 0x1F.
>  * Add Tested-by tag from Yongwei.
> ---
>  target/i386/cpu.c     | 12 +++++++++++-
>  target/i386/cpu.h     |  2 ++
>  target/i386/kvm/kvm.c |  2 +-
>  3 files changed, 14 insertions(+), 2 deletions(-)
>
> diff --git a/target/i386/cpu.c b/target/i386/cpu.c
> index 294ca6b8947a..a2d39d2198b6 100644
> --- a/target/i386/cpu.c
> +++ b/target/i386/cpu.c
> @@ -277,6 +277,8 @@ static uint32_t num_cpus_by_topo_level(X86CPUTopoInfo *topo_info,
>          return 1;
>      case CPU_TOPO_LEVEL_CORE:
>          return topo_info->threads_per_core;
> +    case CPU_TOPO_LEVEL_MODULE:
> +        return topo_info->threads_per_core * topo_info->cores_per_module;
>      case CPU_TOPO_LEVEL_DIE:
>          return topo_info->threads_per_core * topo_info->cores_per_module *
>                 topo_info->modules_per_die;
> @@ -297,6 +299,8 @@ static uint32_t apicid_offset_by_topo_level(X86CPUTopoInfo *topo_info,
>          return 0;
>      case CPU_TOPO_LEVEL_CORE:
>          return apicid_core_offset(topo_info);
> +    case CPU_TOPO_LEVEL_MODULE:
> +        return apicid_module_offset(topo_info);
>      case CPU_TOPO_LEVEL_DIE:
>          return apicid_die_offset(topo_info);
>      case CPU_TOPO_LEVEL_PACKAGE:
> @@ -316,6 +320,8 @@ static uint32_t cpuid1f_topo_type(enum CPUTopoLevel topo_level)
>          return CPUID_1F_ECX_TOPO_LEVEL_SMT;
>      case CPU_TOPO_LEVEL_CORE:
>          return CPUID_1F_ECX_TOPO_LEVEL_CORE;
> +    case CPU_TOPO_LEVEL_MODULE:
> +        return CPUID_1F_ECX_TOPO_LEVEL_MODULE;
>      case CPU_TOPO_LEVEL_DIE:
>          return CPUID_1F_ECX_TOPO_LEVEL_DIE;
>      default:
> @@ -347,6 +353,10 @@ static void encode_topo_cpuid1f(CPUX86State *env, uint32_t count,
>          if (env->nr_dies > 1) {
>              set_bit(CPU_TOPO_LEVEL_DIE, topo_bitmap);
>          }
> +
> +        if (env->nr_modules > 1) {
> +            set_bit(CPU_TOPO_LEVEL_MODULE, topo_bitmap);
> +        }
>      }
>
>      *ecx = count & 0xff;
> @@ -6394,7 +6404,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
>          break;
>      case 0x1F:
>          /* V2 Extended Topology Enumeration Leaf */
> -        if (topo_info.dies_per_pkg < 2) {
> +        if (topo_info.modules_per_die < 2 && topo_info.dies_per_pkg < 2) {

A question:
Is the original checking necessary ?
The 0x1f exists even on cpu w/o modules/dies topology on bare metal, I tried
on EMR:

// leaf 0
0x00000000 0x00: eax=0x00000020 ebx=0x756e6547 ecx=0x6c65746e edx=0x49656e69

// leaf 0x1f
0x0000001f 0x00: eax=0x00000001 ebx=0x00000002 ecx=0x00000100 edx=0x00000004
0x0000001f 0x01: eax=0x00000007 ebx=0x00000080 ecx=0x00000201 edx=0x00000004
0x0000001f 0x02: eax=0x00000000 ebx=0x00000000 ecx=0x00000002 edx=0x00000004

// leaf 0xb
0x0000000b 0x00: eax=0x00000001 ebx=0x00000002 ecx=0x00000100 edx=0x00000004
0x0000000b 0x01: eax=0x00000007 ebx=0x00000080 ecx=0x00000201 edx=0x00000004
0x0000000b 0x02: eax=0x00000000 ebx=0x00000000 ecx=0x00000002 edx=0x00000004

So here leads to different cpu behavior from bare metal, even in case
of "-cpu host".

In SDM Vol2, cpudid instruction section:

" CPUID leaf 1FH is a preferred superset to leaf 0BH. Intel
recommends using leaf 1FH when available rather than leaf
0BH and ensuring that any leaf 0BH algorithms are updated to
support leaf 1FH. "

My understanding: if 0x1f is existed (leaf 0.eax >= 0x1f)
then it should have same values in lp/core level as 0xb.

>              *eax = *ebx = *ecx = *edx = 0;
>              break;
>          }
> diff --git a/target/i386/cpu.h b/target/i386/cpu.h
> index eecd30bde92b..97b290e10576 100644
> --- a/target/i386/cpu.h
> +++ b/target/i386/cpu.h
> @@ -1018,6 +1018,7 @@ enum CPUTopoLevel {
>      CPU_TOPO_LEVEL_INVALID,
>      CPU_TOPO_LEVEL_SMT,
>      CPU_TOPO_LEVEL_CORE,
> +    CPU_TOPO_LEVEL_MODULE,
>      CPU_TOPO_LEVEL_DIE,
>      CPU_TOPO_LEVEL_PACKAGE,
>      CPU_TOPO_LEVEL_MAX,
> @@ -1032,6 +1033,7 @@ enum CPUTopoLevel {
>  #define CPUID_1F_ECX_TOPO_LEVEL_INVALID  CPUID_B_ECX_TOPO_LEVEL_INVALID
>  #define CPUID_1F_ECX_TOPO_LEVEL_SMT      CPUID_B_ECX_TOPO_LEVEL_SMT
>  #define CPUID_1F_ECX_TOPO_LEVEL_CORE     CPUID_B_ECX_TOPO_LEVEL_CORE
> +#define CPUID_1F_ECX_TOPO_LEVEL_MODULE   3
>  #define CPUID_1F_ECX_TOPO_LEVEL_DIE      5
>
>  /* MSR Feature Bits */
> diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
> index 4ce80555b45c..e5ddb214cb36 100644
> --- a/target/i386/kvm/kvm.c
> +++ b/target/i386/kvm/kvm.c
> @@ -1913,7 +1913,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
>              break;
>          }
>          case 0x1f:
> -            if (env->nr_dies < 2) {
> +            if (env->nr_modules < 2 && env->nr_dies < 2) {
>                  break;
>              }
>              /* fallthrough */
> --
> 2.34.1
>
>
Zhao Liu Jan. 15, 2024, 4:09 a.m. UTC | #4
Hi Yuan,

On Mon, Jan 15, 2024 at 11:25:24AM +0800, Yuan Yao wrote:
> Date: Mon, 15 Jan 2024 11:25:24 +0800
> From: Yuan Yao <yuan.yao@linux.intel.com>
> Subject: Re: [PATCH v7 08/16] i386: Expose module level in CPUID[0x1F]
> 
> On Mon, Jan 08, 2024 at 04:27:19PM +0800, Zhao Liu wrote:
> > From: Zhao Liu <zhao1.liu@intel.com>
> >
> > Linux kernel (from v6.4, with commit edc0a2b595765 ("x86/topology: Fix
> > erroneous smp_num_siblings on Intel Hybrid platforms") is able to
> > handle platforms with Module level enumerated via CPUID.1F.
> >
> > Expose the module level in CPUID[0x1F] if the machine has more than 1
> > modules.
> >
> > (Tested CPU topology in CPUID[0x1F] leaf with various die/cluster
> > configurations in "-smp".)
> >
> > Signed-off-by: Zhao Liu <zhao1.liu@intel.com>
> > Tested-by: Babu Moger <babu.moger@amd.com>
> > Tested-by: Yongwei Ma <yongwei.ma@intel.com>
> > Acked-by: Michael S. Tsirkin <mst@redhat.com>
> > ---
> > Changes since v3:
> >  * New patch to expose module level in 0x1F.
> >  * Add Tested-by tag from Yongwei.
> > ---
> >  target/i386/cpu.c     | 12 +++++++++++-
> >  target/i386/cpu.h     |  2 ++
> >  target/i386/kvm/kvm.c |  2 +-
> >  3 files changed, 14 insertions(+), 2 deletions(-)
> >
> > diff --git a/target/i386/cpu.c b/target/i386/cpu.c
> > index 294ca6b8947a..a2d39d2198b6 100644
> > --- a/target/i386/cpu.c
> > +++ b/target/i386/cpu.c
> > @@ -277,6 +277,8 @@ static uint32_t num_cpus_by_topo_level(X86CPUTopoInfo *topo_info,
> >          return 1;
> >      case CPU_TOPO_LEVEL_CORE:
> >          return topo_info->threads_per_core;
> > +    case CPU_TOPO_LEVEL_MODULE:
> > +        return topo_info->threads_per_core * topo_info->cores_per_module;
> >      case CPU_TOPO_LEVEL_DIE:
> >          return topo_info->threads_per_core * topo_info->cores_per_module *
> >                 topo_info->modules_per_die;
> > @@ -297,6 +299,8 @@ static uint32_t apicid_offset_by_topo_level(X86CPUTopoInfo *topo_info,
> >          return 0;
> >      case CPU_TOPO_LEVEL_CORE:
> >          return apicid_core_offset(topo_info);
> > +    case CPU_TOPO_LEVEL_MODULE:
> > +        return apicid_module_offset(topo_info);
> >      case CPU_TOPO_LEVEL_DIE:
> >          return apicid_die_offset(topo_info);
> >      case CPU_TOPO_LEVEL_PACKAGE:
> > @@ -316,6 +320,8 @@ static uint32_t cpuid1f_topo_type(enum CPUTopoLevel topo_level)
> >          return CPUID_1F_ECX_TOPO_LEVEL_SMT;
> >      case CPU_TOPO_LEVEL_CORE:
> >          return CPUID_1F_ECX_TOPO_LEVEL_CORE;
> > +    case CPU_TOPO_LEVEL_MODULE:
> > +        return CPUID_1F_ECX_TOPO_LEVEL_MODULE;
> >      case CPU_TOPO_LEVEL_DIE:
> >          return CPUID_1F_ECX_TOPO_LEVEL_DIE;
> >      default:
> > @@ -347,6 +353,10 @@ static void encode_topo_cpuid1f(CPUX86State *env, uint32_t count,
> >          if (env->nr_dies > 1) {
> >              set_bit(CPU_TOPO_LEVEL_DIE, topo_bitmap);
> >          }
> > +
> > +        if (env->nr_modules > 1) {
> > +            set_bit(CPU_TOPO_LEVEL_MODULE, topo_bitmap);
> > +        }
> >      }
> >
> >      *ecx = count & 0xff;
> > @@ -6394,7 +6404,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
> >          break;
> >      case 0x1F:
> >          /* V2 Extended Topology Enumeration Leaf */
> > -        if (topo_info.dies_per_pkg < 2) {
> > +        if (topo_info.modules_per_die < 2 && topo_info.dies_per_pkg < 2) {
> 
> A question:
> Is the original checking necessary ?
> The 0x1f exists even on cpu w/o modules/dies topology on bare metal, I tried
> on EMR:
> 
> // leaf 0
> 0x00000000 0x00: eax=0x00000020 ebx=0x756e6547 ecx=0x6c65746e edx=0x49656e69
> 
> // leaf 0x1f
> 0x0000001f 0x00: eax=0x00000001 ebx=0x00000002 ecx=0x00000100 edx=0x00000004
> 0x0000001f 0x01: eax=0x00000007 ebx=0x00000080 ecx=0x00000201 edx=0x00000004
> 0x0000001f 0x02: eax=0x00000000 ebx=0x00000000 ecx=0x00000002 edx=0x00000004
> 
> // leaf 0xb
> 0x0000000b 0x00: eax=0x00000001 ebx=0x00000002 ecx=0x00000100 edx=0x00000004
> 0x0000000b 0x01: eax=0x00000007 ebx=0x00000080 ecx=0x00000201 edx=0x00000004
> 0x0000000b 0x02: eax=0x00000000 ebx=0x00000000 ecx=0x00000002 edx=0x00000004

The 0x1f is introduced for CascadeLake-AP with die level. And yes the
newer mahcines all have this leaf.

> 
> So here leads to different cpu behavior from bare metal, even in case
> of "-cpu host".
> 
> In SDM Vol2, cpudid instruction section:
> 
> " CPUID leaf 1FH is a preferred superset to leaf 0BH. Intel
> recommends using leaf 1FH when available rather than leaf
> 0BH and ensuring that any leaf 0BH algorithms are updated to
> support leaf 1FH. "
> 
> My understanding: if 0x1f is existed (leaf 0.eax >= 0x1f)
> then it should have same values in lp/core level as 0xb.

Yes, I think it's time to move to default 0x1f.

The compatibility issue can be solved by a cpuid-0x1f option similar to
cpuid-0xb. I'll cook a patch after this patch series.

Thanks,
Zhao

> 
> >              *eax = *ebx = *ecx = *edx = 0;
> >              break;
> >          }
> > diff --git a/target/i386/cpu.h b/target/i386/cpu.h
> > index eecd30bde92b..97b290e10576 100644
> > --- a/target/i386/cpu.h
> > +++ b/target/i386/cpu.h
> > @@ -1018,6 +1018,7 @@ enum CPUTopoLevel {
> >      CPU_TOPO_LEVEL_INVALID,
> >      CPU_TOPO_LEVEL_SMT,
> >      CPU_TOPO_LEVEL_CORE,
> > +    CPU_TOPO_LEVEL_MODULE,
> >      CPU_TOPO_LEVEL_DIE,
> >      CPU_TOPO_LEVEL_PACKAGE,
> >      CPU_TOPO_LEVEL_MAX,
> > @@ -1032,6 +1033,7 @@ enum CPUTopoLevel {
> >  #define CPUID_1F_ECX_TOPO_LEVEL_INVALID  CPUID_B_ECX_TOPO_LEVEL_INVALID
> >  #define CPUID_1F_ECX_TOPO_LEVEL_SMT      CPUID_B_ECX_TOPO_LEVEL_SMT
> >  #define CPUID_1F_ECX_TOPO_LEVEL_CORE     CPUID_B_ECX_TOPO_LEVEL_CORE
> > +#define CPUID_1F_ECX_TOPO_LEVEL_MODULE   3
> >  #define CPUID_1F_ECX_TOPO_LEVEL_DIE      5
> >
> >  /* MSR Feature Bits */
> > diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
> > index 4ce80555b45c..e5ddb214cb36 100644
> > --- a/target/i386/kvm/kvm.c
> > +++ b/target/i386/kvm/kvm.c
> > @@ -1913,7 +1913,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
> >              break;
> >          }
> >          case 0x1f:
> > -            if (env->nr_dies < 2) {
> > +            if (env->nr_modules < 2 && env->nr_dies < 2) {
> >                  break;
> >              }
> >              /* fallthrough */
> > --
> > 2.34.1
> >
> >
Xiaoyao Li Jan. 15, 2024, 4:34 a.m. UTC | #5
On 1/15/2024 12:09 PM, Zhao Liu wrote:
> Hi Yuan,
> 
> On Mon, Jan 15, 2024 at 11:25:24AM +0800, Yuan Yao wrote:
>> Date: Mon, 15 Jan 2024 11:25:24 +0800
>> From: Yuan Yao <yuan.yao@linux.intel.com>
>> Subject: Re: [PATCH v7 08/16] i386: Expose module level in CPUID[0x1F]
>>
>> On Mon, Jan 08, 2024 at 04:27:19PM +0800, Zhao Liu wrote:
>>> From: Zhao Liu <zhao1.liu@intel.com>
>>>
>>> Linux kernel (from v6.4, with commit edc0a2b595765 ("x86/topology: Fix
>>> erroneous smp_num_siblings on Intel Hybrid platforms") is able to
>>> handle platforms with Module level enumerated via CPUID.1F.
>>>
>>> Expose the module level in CPUID[0x1F] if the machine has more than 1
>>> modules.
>>>
>>> (Tested CPU topology in CPUID[0x1F] leaf with various die/cluster
>>> configurations in "-smp".)
>>>
>>> Signed-off-by: Zhao Liu <zhao1.liu@intel.com>
>>> Tested-by: Babu Moger <babu.moger@amd.com>
>>> Tested-by: Yongwei Ma <yongwei.ma@intel.com>
>>> Acked-by: Michael S. Tsirkin <mst@redhat.com>
>>> ---
>>> Changes since v3:
>>>   * New patch to expose module level in 0x1F.
>>>   * Add Tested-by tag from Yongwei.
>>> ---
>>>   target/i386/cpu.c     | 12 +++++++++++-
>>>   target/i386/cpu.h     |  2 ++
>>>   target/i386/kvm/kvm.c |  2 +-
>>>   3 files changed, 14 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/target/i386/cpu.c b/target/i386/cpu.c
>>> index 294ca6b8947a..a2d39d2198b6 100644
>>> --- a/target/i386/cpu.c
>>> +++ b/target/i386/cpu.c
>>> @@ -277,6 +277,8 @@ static uint32_t num_cpus_by_topo_level(X86CPUTopoInfo *topo_info,
>>>           return 1;
>>>       case CPU_TOPO_LEVEL_CORE:
>>>           return topo_info->threads_per_core;
>>> +    case CPU_TOPO_LEVEL_MODULE:
>>> +        return topo_info->threads_per_core * topo_info->cores_per_module;
>>>       case CPU_TOPO_LEVEL_DIE:
>>>           return topo_info->threads_per_core * topo_info->cores_per_module *
>>>                  topo_info->modules_per_die;
>>> @@ -297,6 +299,8 @@ static uint32_t apicid_offset_by_topo_level(X86CPUTopoInfo *topo_info,
>>>           return 0;
>>>       case CPU_TOPO_LEVEL_CORE:
>>>           return apicid_core_offset(topo_info);
>>> +    case CPU_TOPO_LEVEL_MODULE:
>>> +        return apicid_module_offset(topo_info);
>>>       case CPU_TOPO_LEVEL_DIE:
>>>           return apicid_die_offset(topo_info);
>>>       case CPU_TOPO_LEVEL_PACKAGE:
>>> @@ -316,6 +320,8 @@ static uint32_t cpuid1f_topo_type(enum CPUTopoLevel topo_level)
>>>           return CPUID_1F_ECX_TOPO_LEVEL_SMT;
>>>       case CPU_TOPO_LEVEL_CORE:
>>>           return CPUID_1F_ECX_TOPO_LEVEL_CORE;
>>> +    case CPU_TOPO_LEVEL_MODULE:
>>> +        return CPUID_1F_ECX_TOPO_LEVEL_MODULE;
>>>       case CPU_TOPO_LEVEL_DIE:
>>>           return CPUID_1F_ECX_TOPO_LEVEL_DIE;
>>>       default:
>>> @@ -347,6 +353,10 @@ static void encode_topo_cpuid1f(CPUX86State *env, uint32_t count,
>>>           if (env->nr_dies > 1) {
>>>               set_bit(CPU_TOPO_LEVEL_DIE, topo_bitmap);
>>>           }
>>> +
>>> +        if (env->nr_modules > 1) {
>>> +            set_bit(CPU_TOPO_LEVEL_MODULE, topo_bitmap);
>>> +        }
>>>       }
>>>
>>>       *ecx = count & 0xff;
>>> @@ -6394,7 +6404,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
>>>           break;
>>>       case 0x1F:
>>>           /* V2 Extended Topology Enumeration Leaf */
>>> -        if (topo_info.dies_per_pkg < 2) {
>>> +        if (topo_info.modules_per_die < 2 && topo_info.dies_per_pkg < 2) {
>>
>> A question:
>> Is the original checking necessary ?
>> The 0x1f exists even on cpu w/o modules/dies topology on bare metal, I tried
>> on EMR:
>>
>> // leaf 0
>> 0x00000000 0x00: eax=0x00000020 ebx=0x756e6547 ecx=0x6c65746e edx=0x49656e69
>>
>> // leaf 0x1f
>> 0x0000001f 0x00: eax=0x00000001 ebx=0x00000002 ecx=0x00000100 edx=0x00000004
>> 0x0000001f 0x01: eax=0x00000007 ebx=0x00000080 ecx=0x00000201 edx=0x00000004
>> 0x0000001f 0x02: eax=0x00000000 ebx=0x00000000 ecx=0x00000002 edx=0x00000004
>>
>> // leaf 0xb
>> 0x0000000b 0x00: eax=0x00000001 ebx=0x00000002 ecx=0x00000100 edx=0x00000004
>> 0x0000000b 0x01: eax=0x00000007 ebx=0x00000080 ecx=0x00000201 edx=0x00000004
>> 0x0000000b 0x02: eax=0x00000000 ebx=0x00000000 ecx=0x00000002 edx=0x00000004
> 
> The 0x1f is introduced for CascadeLake-AP with die level. And yes the
> newer mahcines all have this leaf.
> 
>>
>> So here leads to different cpu behavior from bare metal, even in case
>> of "-cpu host".
>>
>> In SDM Vol2, cpudid instruction section:
>>
>> " CPUID leaf 1FH is a preferred superset to leaf 0BH. Intel
>> recommends using leaf 1FH when available rather than leaf
>> 0BH and ensuring that any leaf 0BH algorithms are updated to
>> support leaf 1FH. "
>>
>> My understanding: if 0x1f is existed (leaf 0.eax >= 0x1f)
>> then it should have same values in lp/core level as 0xb.

No. leaf 0x1f reports the same values in lp/core leve as leaf 0xb only 
when the machine supports these two levels. If the machine supports more 
levels, they will be different.

e.g., the data on one Alder lake:

0x0000000b 0x00: eax=0x00000001 ebx=0x00000001 ecx=0x00000100 edx=0x00000006
    0x0000000b 0x01: eax=0x00000007 ebx=0x00000004 ecx=0x00000201 
edx=0x00000006
    0x0000000b 0x02: eax=0x00000000 ebx=0x00000000 ecx=0x00000002 
edx=0x00000006

0x0000001f 0x00: eax=0x00000001 ebx=0x00000001 ecx=0x00000100 edx=0x00000006
    0x0000001f 0x01: eax=0x00000003 ebx=0x00000004 ecx=0x00000201 
edx=0x00000006
    0x0000001f 0x02: eax=0x00000007 ebx=0x00000004 ecx=0x00000302 
edx=0x00000006
    0x0000001f 0x03: eax=0x00000000 ebx=0x00000000 ecx=0x00000003 
edx=0x00000006


> Yes, I think it's time to move to default 0x1f.

we don't need to do so until it's necessary.

> The compatibility issue can be solved by a cpuid-0x1f option similar to
> cpuid-0xb. I'll cook a patch after this patch series.
> 
> Thanks,
> Zhao
> 
>>
>>>               *eax = *ebx = *ecx = *edx = 0;
>>>               break;
>>>           }
>>> diff --git a/target/i386/cpu.h b/target/i386/cpu.h
>>> index eecd30bde92b..97b290e10576 100644
>>> --- a/target/i386/cpu.h
>>> +++ b/target/i386/cpu.h
>>> @@ -1018,6 +1018,7 @@ enum CPUTopoLevel {
>>>       CPU_TOPO_LEVEL_INVALID,
>>>       CPU_TOPO_LEVEL_SMT,
>>>       CPU_TOPO_LEVEL_CORE,
>>> +    CPU_TOPO_LEVEL_MODULE,
>>>       CPU_TOPO_LEVEL_DIE,
>>>       CPU_TOPO_LEVEL_PACKAGE,
>>>       CPU_TOPO_LEVEL_MAX,
>>> @@ -1032,6 +1033,7 @@ enum CPUTopoLevel {
>>>   #define CPUID_1F_ECX_TOPO_LEVEL_INVALID  CPUID_B_ECX_TOPO_LEVEL_INVALID
>>>   #define CPUID_1F_ECX_TOPO_LEVEL_SMT      CPUID_B_ECX_TOPO_LEVEL_SMT
>>>   #define CPUID_1F_ECX_TOPO_LEVEL_CORE     CPUID_B_ECX_TOPO_LEVEL_CORE
>>> +#define CPUID_1F_ECX_TOPO_LEVEL_MODULE   3
>>>   #define CPUID_1F_ECX_TOPO_LEVEL_DIE      5
>>>
>>>   /* MSR Feature Bits */
>>> diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
>>> index 4ce80555b45c..e5ddb214cb36 100644
>>> --- a/target/i386/kvm/kvm.c
>>> +++ b/target/i386/kvm/kvm.c
>>> @@ -1913,7 +1913,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
>>>               break;
>>>           }
>>>           case 0x1f:
>>> -            if (env->nr_dies < 2) {
>>> +            if (env->nr_modules < 2 && env->nr_dies < 2) {
>>>                   break;
>>>               }
>>>               /* fallthrough */
>>> --
>>> 2.34.1
>>>
>>>
>
Yuan Yao Jan. 15, 2024, 5:20 a.m. UTC | #6
On Mon, Jan 15, 2024 at 12:34:12PM +0800, Xiaoyao Li wrote:
> On 1/15/2024 12:09 PM, Zhao Liu wrote:
> > Hi Yuan,
> >
> > On Mon, Jan 15, 2024 at 11:25:24AM +0800, Yuan Yao wrote:
> > > Date: Mon, 15 Jan 2024 11:25:24 +0800
> > > From: Yuan Yao <yuan.yao@linux.intel.com>
> > > Subject: Re: [PATCH v7 08/16] i386: Expose module level in CPUID[0x1F]
> > >
> > > On Mon, Jan 08, 2024 at 04:27:19PM +0800, Zhao Liu wrote:
> > > > From: Zhao Liu <zhao1.liu@intel.com>
> > > >
> > > > Linux kernel (from v6.4, with commit edc0a2b595765 ("x86/topology: Fix
> > > > erroneous smp_num_siblings on Intel Hybrid platforms") is able to
> > > > handle platforms with Module level enumerated via CPUID.1F.
> > > >
> > > > Expose the module level in CPUID[0x1F] if the machine has more than 1
> > > > modules.
> > > >
> > > > (Tested CPU topology in CPUID[0x1F] leaf with various die/cluster
> > > > configurations in "-smp".)
> > > >
> > > > Signed-off-by: Zhao Liu <zhao1.liu@intel.com>
> > > > Tested-by: Babu Moger <babu.moger@amd.com>
> > > > Tested-by: Yongwei Ma <yongwei.ma@intel.com>
> > > > Acked-by: Michael S. Tsirkin <mst@redhat.com>
> > > > ---
> > > > Changes since v3:
> > > >   * New patch to expose module level in 0x1F.
> > > >   * Add Tested-by tag from Yongwei.
> > > > ---
> > > >   target/i386/cpu.c     | 12 +++++++++++-
> > > >   target/i386/cpu.h     |  2 ++
> > > >   target/i386/kvm/kvm.c |  2 +-
> > > >   3 files changed, 14 insertions(+), 2 deletions(-)
> > > >
> > > > diff --git a/target/i386/cpu.c b/target/i386/cpu.c
> > > > index 294ca6b8947a..a2d39d2198b6 100644
> > > > --- a/target/i386/cpu.c
> > > > +++ b/target/i386/cpu.c
> > > > @@ -277,6 +277,8 @@ static uint32_t num_cpus_by_topo_level(X86CPUTopoInfo *topo_info,
> > > >           return 1;
> > > >       case CPU_TOPO_LEVEL_CORE:
> > > >           return topo_info->threads_per_core;
> > > > +    case CPU_TOPO_LEVEL_MODULE:
> > > > +        return topo_info->threads_per_core * topo_info->cores_per_module;
> > > >       case CPU_TOPO_LEVEL_DIE:
> > > >           return topo_info->threads_per_core * topo_info->cores_per_module *
> > > >                  topo_info->modules_per_die;
> > > > @@ -297,6 +299,8 @@ static uint32_t apicid_offset_by_topo_level(X86CPUTopoInfo *topo_info,
> > > >           return 0;
> > > >       case CPU_TOPO_LEVEL_CORE:
> > > >           return apicid_core_offset(topo_info);
> > > > +    case CPU_TOPO_LEVEL_MODULE:
> > > > +        return apicid_module_offset(topo_info);
> > > >       case CPU_TOPO_LEVEL_DIE:
> > > >           return apicid_die_offset(topo_info);
> > > >       case CPU_TOPO_LEVEL_PACKAGE:
> > > > @@ -316,6 +320,8 @@ static uint32_t cpuid1f_topo_type(enum CPUTopoLevel topo_level)
> > > >           return CPUID_1F_ECX_TOPO_LEVEL_SMT;
> > > >       case CPU_TOPO_LEVEL_CORE:
> > > >           return CPUID_1F_ECX_TOPO_LEVEL_CORE;
> > > > +    case CPU_TOPO_LEVEL_MODULE:
> > > > +        return CPUID_1F_ECX_TOPO_LEVEL_MODULE;
> > > >       case CPU_TOPO_LEVEL_DIE:
> > > >           return CPUID_1F_ECX_TOPO_LEVEL_DIE;
> > > >       default:
> > > > @@ -347,6 +353,10 @@ static void encode_topo_cpuid1f(CPUX86State *env, uint32_t count,
> > > >           if (env->nr_dies > 1) {
> > > >               set_bit(CPU_TOPO_LEVEL_DIE, topo_bitmap);
> > > >           }
> > > > +
> > > > +        if (env->nr_modules > 1) {
> > > > +            set_bit(CPU_TOPO_LEVEL_MODULE, topo_bitmap);
> > > > +        }
> > > >       }
> > > >
> > > >       *ecx = count & 0xff;
> > > > @@ -6394,7 +6404,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
> > > >           break;
> > > >       case 0x1F:
> > > >           /* V2 Extended Topology Enumeration Leaf */
> > > > -        if (topo_info.dies_per_pkg < 2) {
> > > > +        if (topo_info.modules_per_die < 2 && topo_info.dies_per_pkg < 2) {
> > >
> > > A question:
> > > Is the original checking necessary ?
> > > The 0x1f exists even on cpu w/o modules/dies topology on bare metal, I tried
> > > on EMR:
> > >
> > > // leaf 0
> > > 0x00000000 0x00: eax=0x00000020 ebx=0x756e6547 ecx=0x6c65746e edx=0x49656e69
> > >
> > > // leaf 0x1f
> > > 0x0000001f 0x00: eax=0x00000001 ebx=0x00000002 ecx=0x00000100 edx=0x00000004
> > > 0x0000001f 0x01: eax=0x00000007 ebx=0x00000080 ecx=0x00000201 edx=0x00000004
> > > 0x0000001f 0x02: eax=0x00000000 ebx=0x00000000 ecx=0x00000002 edx=0x00000004
> > >
> > > // leaf 0xb
> > > 0x0000000b 0x00: eax=0x00000001 ebx=0x00000002 ecx=0x00000100 edx=0x00000004
> > > 0x0000000b 0x01: eax=0x00000007 ebx=0x00000080 ecx=0x00000201 edx=0x00000004
> > > 0x0000000b 0x02: eax=0x00000000 ebx=0x00000000 ecx=0x00000002 edx=0x00000004
> >
> > The 0x1f is introduced for CascadeLake-AP with die level. And yes the
> > newer mahcines all have this leaf.
> >
> > >
> > > So here leads to different cpu behavior from bare metal, even in case
> > > of "-cpu host".
> > >
> > > In SDM Vol2, cpudid instruction section:
> > >
> > > " CPUID leaf 1FH is a preferred superset to leaf 0BH. Intel
> > > recommends using leaf 1FH when available rather than leaf
> > > 0BH and ensuring that any leaf 0BH algorithms are updated to
> > > support leaf 1FH. "
> > >
> > > My understanding: if 0x1f is existed (leaf 0.eax >= 0x1f)
> > > then it should have same values in lp/core level as 0xb.
>
> No. leaf 0x1f reports the same values in lp/core leve as leaf 0xb only when
> the machine supports these two levels. If the machine supports more levels,
> they will be different.
>
> e.g., the data on one Alder lake:
>
> 0x0000000b 0x00: eax=0x00000001 ebx=0x00000001 ecx=0x00000100 edx=0x00000006
> 0x0000000b 0x01: eax=0x00000007 ebx=0x00000004 ecx=0x00000201 edx=0x00000006
> 0x0000000b 0x02: eax=0x00000000 ebx=0x00000000 ecx=0x00000002 edx=0x00000006
>
> 0x0000001f 0x00: eax=0x00000001 ebx=0x00000001 ecx=0x00000100 edx=0x00000006
> 0x0000001f 0x01: eax=0x00000003 ebx=0x00000004 ecx=0x00000201 edx=0x00000006
> 0x0000001f 0x02: eax=0x00000007 ebx=0x00000004 ecx=0x00000302 edx=0x00000006
> 0x0000001f 0x03: eax=0x00000000 ebx=0x00000000 ecx=0x00000003 edx=0x00000006

Ah, so my understanding is incorrect on this.

I tried on one raptor lake i5-i335U, which also hybrid soc but doesn't have
module level, in this case 0x1f and 0xb have same values in core/lp level.

>
>
> > Yes, I think it's time to move to default 0x1f.
>
> we don't need to do so until it's necessary.
>
> > The compatibility issue can be solved by a cpuid-0x1f option similar to
> > cpuid-0xb. I'll cook a patch after this patch series.
> >
> > Thanks,
> > Zhao
> >
> > >
> > > >               *eax = *ebx = *ecx = *edx = 0;
> > > >               break;
> > > >           }
> > > > diff --git a/target/i386/cpu.h b/target/i386/cpu.h
> > > > index eecd30bde92b..97b290e10576 100644
> > > > --- a/target/i386/cpu.h
> > > > +++ b/target/i386/cpu.h
> > > > @@ -1018,6 +1018,7 @@ enum CPUTopoLevel {
> > > >       CPU_TOPO_LEVEL_INVALID,
> > > >       CPU_TOPO_LEVEL_SMT,
> > > >       CPU_TOPO_LEVEL_CORE,
> > > > +    CPU_TOPO_LEVEL_MODULE,
> > > >       CPU_TOPO_LEVEL_DIE,
> > > >       CPU_TOPO_LEVEL_PACKAGE,
> > > >       CPU_TOPO_LEVEL_MAX,
> > > > @@ -1032,6 +1033,7 @@ enum CPUTopoLevel {
> > > >   #define CPUID_1F_ECX_TOPO_LEVEL_INVALID  CPUID_B_ECX_TOPO_LEVEL_INVALID
> > > >   #define CPUID_1F_ECX_TOPO_LEVEL_SMT      CPUID_B_ECX_TOPO_LEVEL_SMT
> > > >   #define CPUID_1F_ECX_TOPO_LEVEL_CORE     CPUID_B_ECX_TOPO_LEVEL_CORE
> > > > +#define CPUID_1F_ECX_TOPO_LEVEL_MODULE   3
> > > >   #define CPUID_1F_ECX_TOPO_LEVEL_DIE      5
> > > >
> > > >   /* MSR Feature Bits */
> > > > diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
> > > > index 4ce80555b45c..e5ddb214cb36 100644
> > > > --- a/target/i386/kvm/kvm.c
> > > > +++ b/target/i386/kvm/kvm.c
> > > > @@ -1913,7 +1913,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
> > > >               break;
> > > >           }
> > > >           case 0x1f:
> > > > -            if (env->nr_dies < 2) {
> > > > +            if (env->nr_modules < 2 && env->nr_dies < 2) {
> > > >                   break;
> > > >               }
> > > >               /* fallthrough */
> > > > --
> > > > 2.34.1
> > > >
> > > >
> >
>
Xiaoyao Li Jan. 15, 2024, 6:11 a.m. UTC | #7
On 1/15/2024 2:12 PM, Zhao Liu wrote:
> Hi Xiaoyao,
> 
> On Mon, Jan 15, 2024 at 12:34:12PM +0800, Xiaoyao Li wrote:
>> Date: Mon, 15 Jan 2024 12:34:12 +0800
>> From: Xiaoyao Li <xiaoyao.li@intel.com>
>> Subject: Re: [PATCH v7 08/16] i386: Expose module level in CPUID[0x1F]
>>
>>> Yes, I think it's time to move to default 0x1f.
>>
>> we don't need to do so until it's necessary.
> 
> Recent and future machines all support 0x1f, and at least SDM has
> emphasized the preferred use of 0x1f.

The preference is the guideline for software e.g., OS. QEMU doesn't need 
to emulate cpuid leaf 0x1f to guest if there is only smt and core level. 
because in this case, they are exactly the same in leaf 0xb and 0x1f. we 
don't need to bother advertising the duplicate data.

> Thanks,
> Zhao
>
Zhao Liu Jan. 15, 2024, 6:12 a.m. UTC | #8
Hi Xiaoyao,

On Mon, Jan 15, 2024 at 12:34:12PM +0800, Xiaoyao Li wrote:
> Date: Mon, 15 Jan 2024 12:34:12 +0800
> From: Xiaoyao Li <xiaoyao.li@intel.com>
> Subject: Re: [PATCH v7 08/16] i386: Expose module level in CPUID[0x1F]
> 
> > Yes, I think it's time to move to default 0x1f.
> 
> we don't need to do so until it's necessary.

Recent and future machines all support 0x1f, and at least SDM has
emphasized the preferred use of 0x1f.

Thanks,
Zhao
Zhao Liu Jan. 15, 2024, 6:20 a.m. UTC | #9
On Mon, Jan 15, 2024 at 01:20:22PM +0800, Yuan Yao wrote:
> Date: Mon, 15 Jan 2024 13:20:22 +0800
> From: Yuan Yao <yuan.yao@linux.intel.com>
> Subject: Re: [PATCH v7 08/16] i386: Expose module level in CPUID[0x1F]
> 
> Ah, so my understanding is incorrect on this.
> 
> I tried on one raptor lake i5-i335U, which also hybrid soc but doesn't have
> module level, in this case 0x1f and 0xb have same values in core/lp level.

Some socs have modules/dies but they don't expose them in 0x1f.

If the soc only expose thread/core levels in 0x1f, then its 0x1f is same
as 0x0b. Otherwise, it will have more subleaves and different
values.

Thanks,
Zhao
Zhao Liu Jan. 15, 2024, 6:35 a.m. UTC | #10
On Mon, Jan 15, 2024 at 02:11:17PM +0800, Xiaoyao Li wrote:
> Date: Mon, 15 Jan 2024 14:11:17 +0800
> From: Xiaoyao Li <xiaoyao.li@intel.com>
> Subject: Re: [PATCH v7 08/16] i386: Expose module level in CPUID[0x1F]
> 
> On 1/15/2024 2:12 PM, Zhao Liu wrote:
> > Hi Xiaoyao,
> > 
> > On Mon, Jan 15, 2024 at 12:34:12PM +0800, Xiaoyao Li wrote:
> > > Date: Mon, 15 Jan 2024 12:34:12 +0800
> > > From: Xiaoyao Li <xiaoyao.li@intel.com>
> > > Subject: Re: [PATCH v7 08/16] i386: Expose module level in CPUID[0x1F]
> > > 
> > > > Yes, I think it's time to move to default 0x1f.
> > > 
> > > we don't need to do so until it's necessary.
> > 
> > Recent and future machines all support 0x1f, and at least SDM has
> > emphasized the preferred use of 0x1f.
> 
> The preference is the guideline for software e.g., OS. QEMU doesn't need to
> emulate cpuid leaf 0x1f to guest if there is only smt and core level.

Please, QEMU is emulating hardware not writing software. Is there any
reason why we shouldn't emulate new and generic hardware behaviors and
stick with the old ones?

> because in this case, they are exactly the same in leaf 0xb and 0x1f. we don't
> need to bother advertising the duplicate data.

You can't "define" the same 0x0b and 0x1f as duplicates. SDM doesn't
have such the definition.

Regards,
Zhao
Yuan Yao Jan. 15, 2024, 6:57 a.m. UTC | #11
On Mon, Jan 15, 2024 at 02:20:20PM +0800, Zhao Liu wrote:
> On Mon, Jan 15, 2024 at 01:20:22PM +0800, Yuan Yao wrote:
> > Date: Mon, 15 Jan 2024 13:20:22 +0800
> > From: Yuan Yao <yuan.yao@linux.intel.com>
> > Subject: Re: [PATCH v7 08/16] i386: Expose module level in CPUID[0x1F]
> >
> > Ah, so my understanding is incorrect on this.
> >
> > I tried on one raptor lake i5-i335U, which also hybrid soc but doesn't have
> > module level, in this case 0x1f and 0xb have same values in core/lp level.
>
> Some socs have modules/dies but they don't expose them in 0x1f.

Here they don't expose because from hardware level they can't or possible
software level configuration (i.e. disable some cores in bios) ?

>
> If the soc only expose thread/core levels in 0x1f, then its 0x1f is same
> as 0x0b. Otherwise, it will have more subleaves and different
> values.
>
> Thanks,
> Zhao
>
Xiaoyao Li Jan. 15, 2024, 7:16 a.m. UTC | #12
On 1/15/2024 2:35 PM, Zhao Liu wrote:
> On Mon, Jan 15, 2024 at 02:11:17PM +0800, Xiaoyao Li wrote:
>> Date: Mon, 15 Jan 2024 14:11:17 +0800
>> From: Xiaoyao Li <xiaoyao.li@intel.com>
>> Subject: Re: [PATCH v7 08/16] i386: Expose module level in CPUID[0x1F]
>>
>> On 1/15/2024 2:12 PM, Zhao Liu wrote:
>>> Hi Xiaoyao,
>>>
>>> On Mon, Jan 15, 2024 at 12:34:12PM +0800, Xiaoyao Li wrote:
>>>> Date: Mon, 15 Jan 2024 12:34:12 +0800
>>>> From: Xiaoyao Li <xiaoyao.li@intel.com>
>>>> Subject: Re: [PATCH v7 08/16] i386: Expose module level in CPUID[0x1F]
>>>>
>>>>> Yes, I think it's time to move to default 0x1f.
>>>>
>>>> we don't need to do so until it's necessary.
>>>
>>> Recent and future machines all support 0x1f, and at least SDM has
>>> emphasized the preferred use of 0x1f.
>>
>> The preference is the guideline for software e.g., OS. QEMU doesn't need to
>> emulate cpuid leaf 0x1f to guest if there is only smt and core level.
> 
> Please, QEMU is emulating hardware not writing software.

what I want to conveyed was that, SDM is teaching software how to probe 
the cpu topology, not suggesting VMM how to advertise cpu topology to 
guest.


> Is there any
> reason why we shouldn't emulate new and generic hardware behaviors and
> stick with the old ones?

I didn't say we shouldn't, but we don't need to do it if it's unnecessary.

if cpuid 0x1f is advertised to guest by default, it will also introduce 
the inconsistence. Old product doesn't have cpuid 0x1f, but using QEMU 
to emualte an old product, it has.

sure we can have code to fix it, that only expose 0x1f to new enough cpu 
model. But it just make thing complicated.

>> because in this case, they are exactly the same in leaf 0xb and 0x1f. we don't
>> need to bother advertising the duplicate data.
> 
> You can't "define" the same 0x0b and 0x1f as duplicates. SDM doesn't
> have such the definition.

for QEMU, they are duplicate data that need to be maintained and need to 
be passed to KVM by KVM_SET_CPUID. For guest, it's also unnecessary, 
because it doesn't provide any additional information with cpuid leaf 1f.

SDM keeps cpuid 0xb is for backwards compatibility.

> Regards,
> Zhao
>
Zhao Liu Jan. 15, 2024, 7:20 a.m. UTC | #13
On Mon, Jan 15, 2024 at 02:57:30PM +0800, Yuan Yao wrote:
> Date: Mon, 15 Jan 2024 14:57:30 +0800
> From: Yuan Yao <yuan.yao@linux.intel.com>
> Subject: Re: [PATCH v7 08/16] i386: Expose module level in CPUID[0x1F]
> 
> On Mon, Jan 15, 2024 at 02:20:20PM +0800, Zhao Liu wrote:
> > On Mon, Jan 15, 2024 at 01:20:22PM +0800, Yuan Yao wrote:
> > > Date: Mon, 15 Jan 2024 13:20:22 +0800
> > > From: Yuan Yao <yuan.yao@linux.intel.com>
> > > Subject: Re: [PATCH v7 08/16] i386: Expose module level in CPUID[0x1F]
> > >
> > > Ah, so my understanding is incorrect on this.
> > >
> > > I tried on one raptor lake i5-i335U, which also hybrid soc but doesn't have
> > > module level, in this case 0x1f and 0xb have same values in core/lp level.
> >
> > Some socs have modules/dies but they don't expose them in 0x1f.
> 
> Here they don't expose because from hardware level they can't or possible
> software level configuration (i.e. disable some cores in bios) ?
>

This leaf is decided at hardware level. Whether or not which levels are exposed
sometimes depends if there is the topology-related feature, but there is no clear
rule (just as in the ADL family neither ADL-S/P exposes modules, while ADL-N
exposes modules).

Regards,
Zhao
Yuan Yao Jan. 15, 2024, 9:03 a.m. UTC | #14
On Mon, Jan 15, 2024 at 03:20:37PM +0800, Zhao Liu wrote:
> On Mon, Jan 15, 2024 at 02:57:30PM +0800, Yuan Yao wrote:
> > Date: Mon, 15 Jan 2024 14:57:30 +0800
> > From: Yuan Yao <yuan.yao@linux.intel.com>
> > Subject: Re: [PATCH v7 08/16] i386: Expose module level in CPUID[0x1F]
> >
> > On Mon, Jan 15, 2024 at 02:20:20PM +0800, Zhao Liu wrote:
> > > On Mon, Jan 15, 2024 at 01:20:22PM +0800, Yuan Yao wrote:
> > > > Date: Mon, 15 Jan 2024 13:20:22 +0800
> > > > From: Yuan Yao <yuan.yao@linux.intel.com>
> > > > Subject: Re: [PATCH v7 08/16] i386: Expose module level in CPUID[0x1F]
> > > >
> > > > Ah, so my understanding is incorrect on this.
> > > >
> > > > I tried on one raptor lake i5-i335U, which also hybrid soc but doesn't have
> > > > module level, in this case 0x1f and 0xb have same values in core/lp level.
> > >
> > > Some socs have modules/dies but they don't expose them in 0x1f.
> >
> > Here they don't expose because from hardware level they can't or possible
> > software level configuration (i.e. disable some cores in bios) ?
> >
>
> This leaf is decided at hardware level. Whether or not which levels are exposed
> sometimes depends if there is the topology-related feature, but there is no clear
> rule (just as in the ADL family neither ADL-S/P exposes modules, while ADL-N
> exposes modules).

I see, thanks for your information!

>
> Regards,
> Zhao
>
Zhao Liu Jan. 15, 2024, 3:46 p.m. UTC | #15
Hi Xiaoyao,

On Mon, Jan 15, 2024 at 03:16:43PM +0800, Xiaoyao Li wrote:
> Date: Mon, 15 Jan 2024 15:16:43 +0800
> From: Xiaoyao Li <xiaoyao.li@intel.com>
> Subject: Re: [PATCH v7 08/16] i386: Expose module level in CPUID[0x1F]
> 
> On 1/15/2024 2:35 PM, Zhao Liu wrote:
> > On Mon, Jan 15, 2024 at 02:11:17PM +0800, Xiaoyao Li wrote:
> > > Date: Mon, 15 Jan 2024 14:11:17 +0800
> > > From: Xiaoyao Li <xiaoyao.li@intel.com>
> > > Subject: Re: [PATCH v7 08/16] i386: Expose module level in CPUID[0x1F]
> > > 
> > > On 1/15/2024 2:12 PM, Zhao Liu wrote:
> > > > Hi Xiaoyao,
> > > > 
> > > > On Mon, Jan 15, 2024 at 12:34:12PM +0800, Xiaoyao Li wrote:
> > > > > Date: Mon, 15 Jan 2024 12:34:12 +0800
> > > > > From: Xiaoyao Li <xiaoyao.li@intel.com>
> > > > > Subject: Re: [PATCH v7 08/16] i386: Expose module level in CPUID[0x1F]
> > > > > 
> > > > > > Yes, I think it's time to move to default 0x1f.
> > > > > 
> > > > > we don't need to do so until it's necessary.
> > > > 
> > > > Recent and future machines all support 0x1f, and at least SDM has
> > > > emphasized the preferred use of 0x1f.
> > > 
> > > The preference is the guideline for software e.g., OS. QEMU doesn't need to
> > > emulate cpuid leaf 0x1f to guest if there is only smt and core level.
> > 
> > Please, QEMU is emulating hardware not writing software.
> 
> what I want to conveyed was that, SDM is teaching software how to probe the
> cpu topology, not suggesting VMM how to advertise cpu topology to guest.
> 

This reflects the hardware's behavioral tendency. Additionally, due to SDM
related suggestion about 0x1f preference, lots of new software may rely on
0x1f, making 0x1f as the default enabling leaf helps to enhance Guest
compatibility.

> 
> > Is there any
> > reason why we shouldn't emulate new and generic hardware behaviors and
> > stick with the old ones?
> 
> I didn't say we shouldn't, but we don't need to do it if it's unnecessary.

Probably never going to deprecate 0x0b, and 0x1f is in fact replacing 0x0b,
kind of like a timing issue, when should 0x1f be enabled by default?

Maybe for some new CPU models or -host, we can start making it as default.
This eliminates the difference in the CPU topology enumeration interface
between Host and Guest. What do you think?

> 
> if cpuid 0x1f is advertised to guest by default, it will also introduce the
> inconsistence. Old product doesn't have cpuid 0x1f, but using QEMU to
> emualte an old product, it has.

Yes, this is the similar case as 0x0b. Old machine doens't has 0x0b. And
QEMU uses cpuid-0xb option to resolve compatibility issue.

> 
> sure we can have code to fix it, that only expose 0x1f to new enough cpu
> model. But it just make thing complicated.
> 
> > > because in this case, they are exactly the same in leaf 0xb and 0x1f. we don't
> > > need to bother advertising the duplicate data.
> > 
> > You can't "define" the same 0x0b and 0x1f as duplicates. SDM doesn't
> > have such the definition.
> 
> for QEMU, they are duplicate data that need to be maintained and need to be
> passed to KVM by KVM_SET_CPUID. For guest, it's also unnecessary, because it
> doesn't provide any additional information with cpuid leaf 1f.

I understand your concerns. The benefit is to follow the behavior of the
new hardware and spec recommendations, on new machines people are going
to be more accustomed to using 0x1f to get topology, and VMs on new
machines that don't have 0x1f will tend to get confused.

I could start by having a look at if we could synchronize Host in -host
to enable 0x1f. If there isn't too much block, -host is an acceptable
starting point, after all, there are no additional compatibility issues
for this case. ;-)

Thanks,
Zhao

> 
> SDM keeps cpuid 0xb is for backwards compatibility.
>
diff mbox series

Patch

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 294ca6b8947a..a2d39d2198b6 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -277,6 +277,8 @@  static uint32_t num_cpus_by_topo_level(X86CPUTopoInfo *topo_info,
         return 1;
     case CPU_TOPO_LEVEL_CORE:
         return topo_info->threads_per_core;
+    case CPU_TOPO_LEVEL_MODULE:
+        return topo_info->threads_per_core * topo_info->cores_per_module;
     case CPU_TOPO_LEVEL_DIE:
         return topo_info->threads_per_core * topo_info->cores_per_module *
                topo_info->modules_per_die;
@@ -297,6 +299,8 @@  static uint32_t apicid_offset_by_topo_level(X86CPUTopoInfo *topo_info,
         return 0;
     case CPU_TOPO_LEVEL_CORE:
         return apicid_core_offset(topo_info);
+    case CPU_TOPO_LEVEL_MODULE:
+        return apicid_module_offset(topo_info);
     case CPU_TOPO_LEVEL_DIE:
         return apicid_die_offset(topo_info);
     case CPU_TOPO_LEVEL_PACKAGE:
@@ -316,6 +320,8 @@  static uint32_t cpuid1f_topo_type(enum CPUTopoLevel topo_level)
         return CPUID_1F_ECX_TOPO_LEVEL_SMT;
     case CPU_TOPO_LEVEL_CORE:
         return CPUID_1F_ECX_TOPO_LEVEL_CORE;
+    case CPU_TOPO_LEVEL_MODULE:
+        return CPUID_1F_ECX_TOPO_LEVEL_MODULE;
     case CPU_TOPO_LEVEL_DIE:
         return CPUID_1F_ECX_TOPO_LEVEL_DIE;
     default:
@@ -347,6 +353,10 @@  static void encode_topo_cpuid1f(CPUX86State *env, uint32_t count,
         if (env->nr_dies > 1) {
             set_bit(CPU_TOPO_LEVEL_DIE, topo_bitmap);
         }
+
+        if (env->nr_modules > 1) {
+            set_bit(CPU_TOPO_LEVEL_MODULE, topo_bitmap);
+        }
     }
 
     *ecx = count & 0xff;
@@ -6394,7 +6404,7 @@  void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
         break;
     case 0x1F:
         /* V2 Extended Topology Enumeration Leaf */
-        if (topo_info.dies_per_pkg < 2) {
+        if (topo_info.modules_per_die < 2 && topo_info.dies_per_pkg < 2) {
             *eax = *ebx = *ecx = *edx = 0;
             break;
         }
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index eecd30bde92b..97b290e10576 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -1018,6 +1018,7 @@  enum CPUTopoLevel {
     CPU_TOPO_LEVEL_INVALID,
     CPU_TOPO_LEVEL_SMT,
     CPU_TOPO_LEVEL_CORE,
+    CPU_TOPO_LEVEL_MODULE,
     CPU_TOPO_LEVEL_DIE,
     CPU_TOPO_LEVEL_PACKAGE,
     CPU_TOPO_LEVEL_MAX,
@@ -1032,6 +1033,7 @@  enum CPUTopoLevel {
 #define CPUID_1F_ECX_TOPO_LEVEL_INVALID  CPUID_B_ECX_TOPO_LEVEL_INVALID
 #define CPUID_1F_ECX_TOPO_LEVEL_SMT      CPUID_B_ECX_TOPO_LEVEL_SMT
 #define CPUID_1F_ECX_TOPO_LEVEL_CORE     CPUID_B_ECX_TOPO_LEVEL_CORE
+#define CPUID_1F_ECX_TOPO_LEVEL_MODULE   3
 #define CPUID_1F_ECX_TOPO_LEVEL_DIE      5
 
 /* MSR Feature Bits */
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 4ce80555b45c..e5ddb214cb36 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -1913,7 +1913,7 @@  int kvm_arch_init_vcpu(CPUState *cs)
             break;
         }
         case 0x1f:
-            if (env->nr_dies < 2) {
+            if (env->nr_modules < 2 && env->nr_dies < 2) {
                 break;
             }
             /* fallthrough */