diff mbox series

cpu: Initialize nr_cores and nr_threads in cpu_common_initfn()

Message ID 20241122160317.4070177-1-xiaoyao.li@intel.com (mailing list archive)
State New
Headers show
Series cpu: Initialize nr_cores and nr_threads in cpu_common_initfn() | expand

Commit Message

Xiaoyao Li Nov. 22, 2024, 4:03 p.m. UTC
Currently cpu->nr_cores and cpu->nr_threads are initialized in
qemu_init_vcpu(), which is called a bit late in *cpu_realizefn() for
each ARCHes.

x86 arch would like to use nr_cores and nr_threads earlier in its
realizefn(). To serve this purpose, initialize nr_cores and nr_threads
in cpu_common_initfn(), which is earlier than *cpu_realizefn().

Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
---
 hw/core/cpu-common.c | 10 +++++++++-
 system/cpus.c        |  4 ----
 2 files changed, 9 insertions(+), 5 deletions(-)

Comments

Philippe Mathieu-Daudé Nov. 22, 2024, 5:26 p.m. UTC | #1
On 22/11/24 17:03, Xiaoyao Li wrote:
> Currently cpu->nr_cores and cpu->nr_threads are initialized in
> qemu_init_vcpu(), which is called a bit late in *cpu_realizefn() for
> each ARCHes.
> 
> x86 arch would like to use nr_cores and nr_threads earlier in its
> realizefn(). To serve this purpose, initialize nr_cores and nr_threads
> in cpu_common_initfn(), which is earlier than *cpu_realizefn().
> 
> Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
> ---
>   hw/core/cpu-common.c | 10 +++++++++-
>   system/cpus.c        |  4 ----
>   2 files changed, 9 insertions(+), 5 deletions(-)
> 
> diff --git a/hw/core/cpu-common.c b/hw/core/cpu-common.c
> index 09c79035949b..6de92ed854bc 100644
> --- a/hw/core/cpu-common.c
> +++ b/hw/core/cpu-common.c
> @@ -237,14 +237,22 @@ static void cpu_common_unrealizefn(DeviceState *dev)
>   static void cpu_common_initfn(Object *obj)
>   {
>       CPUState *cpu = CPU(obj);
> +    Object *machine = qdev_get_machine();
> +    MachineState *ms;
>   
>       gdb_init_cpu(cpu);
>       cpu->cpu_index = UNASSIGNED_CPU_INDEX;
>       cpu->cluster_index = UNASSIGNED_CLUSTER_INDEX;
>       /* user-mode doesn't have configurable SMP topology */
> -    /* the default value is changed by qemu_init_vcpu() for system-mode */
>       cpu->nr_cores = 1;
>       cpu->nr_threads = 1;
> +#ifndef CONFIG_USER_ONLY

Is CONFIG_USER_ONLY available in an common_ss[] object? I don't recall.

Anyway, can we not use CONFIG_USER_ONLY in cpu-common.c?

> +    if (object_dynamic_cast(machine, TYPE_MACHINE)) {
> +        ms = MACHINE(machine);
> +        cpu->nr_cores = machine_topo_get_cores_per_socket(ms);
> +        cpu->nr_threads = ms->smp.threads;
> +    }
> +#endif
>       cpu->cflags_next_tb = -1;
>   
>       /* allocate storage for thread info, initialise condition variables */
> diff --git a/system/cpus.c b/system/cpus.c
> index 1c818ff6828c..c1547fbfd39b 100644
> --- a/system/cpus.c
> +++ b/system/cpus.c
> @@ -664,10 +664,6 @@ const AccelOpsClass *cpus_get_accel(void)
>   
>   void qemu_init_vcpu(CPUState *cpu)
>   {
> -    MachineState *ms = MACHINE(qdev_get_machine());
> -
> -    cpu->nr_cores = machine_topo_get_cores_per_socket(ms);
> -    cpu->nr_threads =  ms->smp.threads;
>       cpu->stopped = true;
>       cpu->random_seed = qemu_guest_random_seed_thread_part1();
>
Richard Henderson Nov. 22, 2024, 7:17 p.m. UTC | #2
On 11/22/24 11:26, Philippe Mathieu-Daudé wrote:
> On 22/11/24 17:03, Xiaoyao Li wrote:
>> Currently cpu->nr_cores and cpu->nr_threads are initialized in
>> qemu_init_vcpu(), which is called a bit late in *cpu_realizefn() for
>> each ARCHes.
>>
>> x86 arch would like to use nr_cores and nr_threads earlier in its
>> realizefn(). To serve this purpose, initialize nr_cores and nr_threads
>> in cpu_common_initfn(), which is earlier than *cpu_realizefn().
>>
>> Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
>> ---
>>   hw/core/cpu-common.c | 10 +++++++++-
>>   system/cpus.c        |  4 ----
>>   2 files changed, 9 insertions(+), 5 deletions(-)
>>
>> diff --git a/hw/core/cpu-common.c b/hw/core/cpu-common.c
>> index 09c79035949b..6de92ed854bc 100644
>> --- a/hw/core/cpu-common.c
>> +++ b/hw/core/cpu-common.c
>> @@ -237,14 +237,22 @@ static void cpu_common_unrealizefn(DeviceState *dev)
>>   static void cpu_common_initfn(Object *obj)
>>   {
>>       CPUState *cpu = CPU(obj);
>> +    Object *machine = qdev_get_machine();
>> +    MachineState *ms;
>>       gdb_init_cpu(cpu);
>>       cpu->cpu_index = UNASSIGNED_CPU_INDEX;
>>       cpu->cluster_index = UNASSIGNED_CLUSTER_INDEX;
>>       /* user-mode doesn't have configurable SMP topology */
>> -    /* the default value is changed by qemu_init_vcpu() for system-mode */
>>       cpu->nr_cores = 1;
>>       cpu->nr_threads = 1;
>> +#ifndef CONFIG_USER_ONLY
> 
> Is CONFIG_USER_ONLY available in an common_ss[] object? I don't recall.

No.


r~
Igor Mammedov Nov. 25, 2024, 9:38 a.m. UTC | #3
On Fri, 22 Nov 2024 11:03:17 -0500
Xiaoyao Li <xiaoyao.li@intel.com> wrote:

> Currently cpu->nr_cores and cpu->nr_threads are initialized in
> qemu_init_vcpu(), which is called a bit late in *cpu_realizefn() for
> each ARCHes.
> 
> x86 arch would like to use nr_cores and nr_threads earlier in its
> realizefn(). To serve this purpose, initialize nr_cores and nr_threads
> in cpu_common_initfn(), which is earlier than *cpu_realizefn().
> 
> Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
> ---
>  hw/core/cpu-common.c | 10 +++++++++-
>  system/cpus.c        |  4 ----
>  2 files changed, 9 insertions(+), 5 deletions(-)
> 
> diff --git a/hw/core/cpu-common.c b/hw/core/cpu-common.c
> index 09c79035949b..6de92ed854bc 100644
> --- a/hw/core/cpu-common.c
> +++ b/hw/core/cpu-common.c
> @@ -237,14 +237,22 @@ static void cpu_common_unrealizefn(DeviceState *dev)
>  static void cpu_common_initfn(Object *obj)
>  {
>      CPUState *cpu = CPU(obj);
> +    Object *machine = qdev_get_machine();
> +    MachineState *ms;
>  
>      gdb_init_cpu(cpu);
>      cpu->cpu_index = UNASSIGNED_CPU_INDEX;
>      cpu->cluster_index = UNASSIGNED_CLUSTER_INDEX;
>      /* user-mode doesn't have configurable SMP topology */
> -    /* the default value is changed by qemu_init_vcpu() for system-mode */
>      cpu->nr_cores = 1;
>      cpu->nr_threads = 1;
> +#ifndef CONFIG_USER_ONLY
> +    if (object_dynamic_cast(machine, TYPE_MACHINE)) {
> +        ms = MACHINE(machine);
> +        cpu->nr_cores = machine_topo_get_cores_per_socket(ms);
> +        cpu->nr_threads = ms->smp.threads;
> +    }
> +#endif

Can't say, that I'm fond of adding/moving hack to access MachineState
from CPU context. Granted we did/still do it elsewhere, But I'd rather
prefer getting rid of those remnants that access globals.
It's basically technical debt we are carrying since 2009 (dc6b1c09849).
Moving that around doesn't help with getting rid of arbitrary access to globals.

As Paolo've noted there are other ways to set cores/threads,
albeit at expense of adding more code. And that could be fine
if it's done within expected cpu initialization flow.

Instead of accessing MachineState directly from CPU code (which is
essentially a layer violation), I'd suggest to set cores_nr/threads_nr
from pre_plug handler (which is machine code).
We do similar thing for nr_dies/nr_modules already, and we should do
same for cores/trheads.

Quick hack would be do the same for cores/threads in x86_cpu_pre_plug(),
and make qemu_init_vcpu() conditional to avoid touching other targets/machines.

I'd even ack that, however that's just leaves us with the same
old technical debt. So I'd like to coax a promise to fix it properly
(i.e. get rid of access to machine from CPU code).

(here goes typical ask to rewrite whole QEMU before doing thing you
actually need)

To do that we would need to:
  1. audit usage of cpu->nr_cores/cpu->nr_threads across QEMU, to figure out
     what targets/machines need them
  2. then add pre_plug() handlers to those machines to set them.
  3. In the end get rid of initializing them in cpu_common_initfn().

With that done we can then add a common helper to generalize topo config
based on -smp from pre_plug() handlers to reduce duplication caused by
per machine pre_plug handlers.

Or introduce a generic cpu_pre_plug() handler at Machine level and make
_pre_plug call chain to call it (sort of what we do with nested realize calls);

I'd prefer the 1st option (#2) as it explicitly documents what
targets/machines care about cores/threads at expense of some boiler plate code
duplication, instead of blanket generic fallback like we have now (regardless of
if it's actually needed).

>      cpu->cflags_next_tb = -1;
>  
>      /* allocate storage for thread info, initialise condition variables */
> diff --git a/system/cpus.c b/system/cpus.c
> index 1c818ff6828c..c1547fbfd39b 100644
> --- a/system/cpus.c
> +++ b/system/cpus.c
> @@ -664,10 +664,6 @@ const AccelOpsClass *cpus_get_accel(void)
>  
>  void qemu_init_vcpu(CPUState *cpu)
>  {
> -    MachineState *ms = MACHINE(qdev_get_machine());
> -
> -    cpu->nr_cores = machine_topo_get_cores_per_socket(ms);
> -    cpu->nr_threads =  ms->smp.threads;
>      cpu->stopped = true;
>      cpu->random_seed = qemu_guest_random_seed_thread_part1();
>
diff mbox series

Patch

diff --git a/hw/core/cpu-common.c b/hw/core/cpu-common.c
index 09c79035949b..6de92ed854bc 100644
--- a/hw/core/cpu-common.c
+++ b/hw/core/cpu-common.c
@@ -237,14 +237,22 @@  static void cpu_common_unrealizefn(DeviceState *dev)
 static void cpu_common_initfn(Object *obj)
 {
     CPUState *cpu = CPU(obj);
+    Object *machine = qdev_get_machine();
+    MachineState *ms;
 
     gdb_init_cpu(cpu);
     cpu->cpu_index = UNASSIGNED_CPU_INDEX;
     cpu->cluster_index = UNASSIGNED_CLUSTER_INDEX;
     /* user-mode doesn't have configurable SMP topology */
-    /* the default value is changed by qemu_init_vcpu() for system-mode */
     cpu->nr_cores = 1;
     cpu->nr_threads = 1;
+#ifndef CONFIG_USER_ONLY
+    if (object_dynamic_cast(machine, TYPE_MACHINE)) {
+        ms = MACHINE(machine);
+        cpu->nr_cores = machine_topo_get_cores_per_socket(ms);
+        cpu->nr_threads = ms->smp.threads;
+    }
+#endif
     cpu->cflags_next_tb = -1;
 
     /* allocate storage for thread info, initialise condition variables */
diff --git a/system/cpus.c b/system/cpus.c
index 1c818ff6828c..c1547fbfd39b 100644
--- a/system/cpus.c
+++ b/system/cpus.c
@@ -664,10 +664,6 @@  const AccelOpsClass *cpus_get_accel(void)
 
 void qemu_init_vcpu(CPUState *cpu)
 {
-    MachineState *ms = MACHINE(qdev_get_machine());
-
-    cpu->nr_cores = machine_topo_get_cores_per_socket(ms);
-    cpu->nr_threads =  ms->smp.threads;
     cpu->stopped = true;
     cpu->random_seed = qemu_guest_random_seed_thread_part1();