diff mbox

ppc/pnv: fix cores per chip for multiple cpus

Message ID 87y3p7nugc.fsf@abhimanyu.i-did-not-set--mail-host-address--so-tickle-me (mailing list archive)
State New, archived
Headers show

Commit Message

Nikunj A. Dadhania Sept. 22, 2017, 6 a.m. UTC
David Gibson <david@gibson.dropbear.id.au> writes:

>> >> 
>> >> As smp_thread defaults to 1 in vl.c, similarly smp_cores also has the
>> >> default value of 1 in vl.c. In powernv, we were setting nr-cores like
>> >> this:
>> >> 
>> >>         object_property_set_int(chip, smp_cores, "nr-cores", &error_fatal);
>> >> 
>> >> Even when there were multiple cpus (-smp 4), when the guest boots up, we
>> >> just get one core (i.e. smp_cores was 1) with single thread(smp_threads
>> >> was 1), which is wrong as per the command-line that was provided.
>> >
>> > Right, so, -smp 4 defaults to 4 sockets, each with 1 core of 1
>> > thread.  If you can't supply 4 sockets you should error, but you
>> > shouldn't go and change the number of cores per socket.
>> 
>> OK, that makes sense now. And I do see that smp_cpus is 4 in the above
>> case. Now looking more into it, i see that powernv has something called
>> "num_chips", isnt this same as sockets ? Do we need num_chips separately?
>
> Ah, yes, I see.  It's probably still reasonable to keep num_chips as
> an internal variable, rather than using (smp_cpus / smp_cores /
> smp_threads) everywhere.  But we shouldn't have it as a direct
> user-settable property, instead setting it from the -smp command line
> option.

Something like the below works till num_chips=2, after that guest does
not boot up. This might be some limitation within the OS, Cedric might
have some clue. Otherwise, I see that multiple chips are created with
single core having single thread.

    ppc/pnv: Use num_chips for multiple sockets
    
    When the user does not provide the cpu topology, e.g. "-smp 4", machine fails to
    initialize 4 cpus. QEMU assumes smp_threads and smp_cores both as 1. Make sure
    that we initialize multiple chips for this.
    
    Remove the user-settable property num_chips from machine command line option
    
    Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>

Comments

Cédric Le Goater Sept. 22, 2017, 6:07 a.m. UTC | #1
On 09/22/2017 08:00 AM, Nikunj A Dadhania wrote:
> David Gibson <david@gibson.dropbear.id.au> writes:
> 
>>>>>
>>>>> As smp_thread defaults to 1 in vl.c, similarly smp_cores also has the
>>>>> default value of 1 in vl.c. In powernv, we were setting nr-cores like
>>>>> this:
>>>>>
>>>>>         object_property_set_int(chip, smp_cores, "nr-cores", &error_fatal);
>>>>>
>>>>> Even when there were multiple cpus (-smp 4), when the guest boots up, we
>>>>> just get one core (i.e. smp_cores was 1) with single thread(smp_threads
>>>>> was 1), which is wrong as per the command-line that was provided.
>>>>
>>>> Right, so, -smp 4 defaults to 4 sockets, each with 1 core of 1
>>>> thread.  If you can't supply 4 sockets you should error, but you
>>>> shouldn't go and change the number of cores per socket.
>>>
>>> OK, that makes sense now. And I do see that smp_cpus is 4 in the above
>>> case. Now looking more into it, i see that powernv has something called
>>> "num_chips", isnt this same as sockets ? Do we need num_chips separately?
>>
>> Ah, yes, I see.  It's probably still reasonable to keep num_chips as
>> an internal variable, rather than using (smp_cpus / smp_cores /
>> smp_threads) everywhere.  But we shouldn't have it as a direct
>> user-settable property, instead setting it from the -smp command line
>> option.
> 
> Something like the below works till num_chips=2, after that guest does
> not boot up. This might be some limitation within the OS, Cedric might
> have some clue.

Some controllers might need some more tweaking, PSI, LPC, to elect a 
master one. Anyhow I don't think we want to deduce the number of chips
from the number of cpus. These two figures are very different.

C.

> Otherwise, I see that multiple chips are created with single core having 
> single thread.
>
>     ppc/pnv: Use num_chips for multiple sockets
>     
>     When the user does not provide the cpu topology, e.g. "-smp 4", machine fails to
>     initialize 4 cpus. QEMU assumes smp_threads and smp_cores both as 1. Make sure
>     that we initialize multiple chips for this.
>     
>     Remove the user-settable property num_chips from machine command line option
>     
>     Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
> 
> diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
> index 9724719..fa501f9 100644
> --- a/hw/ppc/pnv.c
> +++ b/hw/ppc/pnv.c
> @@ -709,7 +709,17 @@ static void ppc_powernv_init(MachineState *machine)
>          exit(1);
>      }
>  
> +    pnv->num_chips = smp_cpus / (smp_cores * smp_threads);
>      pnv->chips = g_new0(PnvChip *, pnv->num_chips);
> +
> +    if (smp_cpus != (smp_threads * pnv->num_chips * smp_cores)) {
> +        error_report("cpu topology not balanced: "
> +                     "chips (%u) * cores (%u) * threads (%u) != "
> +                     "number of cpus (%u)",
> +                     pnv->num_chips, smp_cores, smp_threads, smp_cpus);
> +        exit(1);
> +    }
> +
>      for (i = 0; i < pnv->num_chips; i++) {
>          char chip_name[32];
>          Object *chip = object_new(chip_typename);
> @@ -1255,53 +1265,12 @@ static void pnv_pic_print_info(InterruptStatsProvider *obj,
>      }
>  }
>  
> -static void pnv_get_num_chips(Object *obj, Visitor *v, const char *name,
> -                              void *opaque, Error **errp)
> -{
> -    visit_type_uint32(v, name, &POWERNV_MACHINE(obj)->num_chips, errp);
> -}
> -
> -static void pnv_set_num_chips(Object *obj, Visitor *v, const char *name,
> -                              void *opaque, Error **errp)
> -{
> -    PnvMachineState *pnv = POWERNV_MACHINE(obj);
> -    uint32_t num_chips;
> -    Error *local_err = NULL;
> -
> -    visit_type_uint32(v, name, &num_chips, &local_err);
> -    if (local_err) {
> -        error_propagate(errp, local_err);
> -        return;
> -    }
> -
> -    /*
> -     * TODO: should we decide on how many chips we can create based
> -     * on #cores and Venice vs. Murano vs. Naples chip type etc...,
> -     */
> -    if (!is_power_of_2(num_chips) || num_chips > 4) {
> -        error_setg(errp, "invalid number of chips: '%d'", num_chips);
> -        return;
> -    }
> -
> -    pnv->num_chips = num_chips;
> -}
> -
>  static void powernv_machine_initfn(Object *obj)
>  {
>      PnvMachineState *pnv = POWERNV_MACHINE(obj);
>      pnv->num_chips = 1;
>  }
>  
> -static void powernv_machine_class_props_init(ObjectClass *oc)
> -{
> -    object_class_property_add(oc, "num-chips", "uint32",
> -                              pnv_get_num_chips, pnv_set_num_chips,
> -                              NULL, NULL, NULL);
> -    object_class_property_set_description(oc, "num-chips",
> -                              "Specifies the number of processor chips",
> -                              NULL);
> -}
> -
>  static void powernv_machine_class_init(ObjectClass *oc, void *data)
>  {
>      MachineClass *mc = MACHINE_CLASS(oc);
> @@ -1321,8 +1290,6 @@ static void powernv_machine_class_init(ObjectClass *oc, void *data)
>      xic->ics_get = pnv_ics_get;
>      xic->ics_resend = pnv_ics_resend;
>      ispc->print_info = pnv_pic_print_info;
> -
> -    powernv_machine_class_props_init(oc);
>  }
>  
>  static const TypeInfo powernv_machine_info = {
>
David Gibson Sept. 22, 2017, 10:12 a.m. UTC | #2
On Fri, Sep 22, 2017 at 08:07:06AM +0200, Cédric Le Goater wrote:
> On 09/22/2017 08:00 AM, Nikunj A Dadhania wrote:
> > David Gibson <david@gibson.dropbear.id.au> writes:
> > 
> >>>>>
> >>>>> As smp_thread defaults to 1 in vl.c, similarly smp_cores also has the
> >>>>> default value of 1 in vl.c. In powernv, we were setting nr-cores like
> >>>>> this:
> >>>>>
> >>>>>         object_property_set_int(chip, smp_cores, "nr-cores", &error_fatal);
> >>>>>
> >>>>> Even when there were multiple cpus (-smp 4), when the guest boots up, we
> >>>>> just get one core (i.e. smp_cores was 1) with single thread(smp_threads
> >>>>> was 1), which is wrong as per the command-line that was provided.
> >>>>
> >>>> Right, so, -smp 4 defaults to 4 sockets, each with 1 core of 1
> >>>> thread.  If you can't supply 4 sockets you should error, but you
> >>>> shouldn't go and change the number of cores per socket.
> >>>
> >>> OK, that makes sense now. And I do see that smp_cpus is 4 in the above
> >>> case. Now looking more into it, i see that powernv has something called
> >>> "num_chips", isnt this same as sockets ? Do we need num_chips separately?
> >>
> >> Ah, yes, I see.  It's probably still reasonable to keep num_chips as
> >> an internal variable, rather than using (smp_cpus / smp_cores /
> >> smp_threads) everywhere.  But we shouldn't have it as a direct
> >> user-settable property, instead setting it from the -smp command line
> >> option.
> > 
> > Something like the below works till num_chips=2, after that guest does
> > not boot up. This might be some limitation within the OS, Cedric might
> > have some clue.
> 
> Some controllers might need some more tweaking, PSI, LPC, to elect a 
> master one.

Uh.. why?

> Anyhow I don't think we want to deduce the number of chips
> from the number of cpus. These two figures are very different.

How so?  It's not totally perfect, but making a single chip correspond
to a "socket" in qemu's (somewhat x86 centric) terminology is still a
pretty good match.
Cédric Le Goater Sept. 22, 2017, 10:46 a.m. UTC | #3
On 09/22/2017 12:12 PM, David Gibson wrote:
> On Fri, Sep 22, 2017 at 08:07:06AM +0200, Cédric Le Goater wrote:
>> On 09/22/2017 08:00 AM, Nikunj A Dadhania wrote:
>>> David Gibson <david@gibson.dropbear.id.au> writes:
>>>
>>>>>>>
>>>>>>> As smp_thread defaults to 1 in vl.c, similarly smp_cores also has the
>>>>>>> default value of 1 in vl.c. In powernv, we were setting nr-cores like
>>>>>>> this:
>>>>>>>
>>>>>>>         object_property_set_int(chip, smp_cores, "nr-cores", &error_fatal);
>>>>>>>
>>>>>>> Even when there were multiple cpus (-smp 4), when the guest boots up, we
>>>>>>> just get one core (i.e. smp_cores was 1) with single thread(smp_threads
>>>>>>> was 1), which is wrong as per the command-line that was provided.
>>>>>>
>>>>>> Right, so, -smp 4 defaults to 4 sockets, each with 1 core of 1
>>>>>> thread.  If you can't supply 4 sockets you should error, but you
>>>>>> shouldn't go and change the number of cores per socket.
>>>>>
>>>>> OK, that makes sense now. And I do see that smp_cpus is 4 in the above
>>>>> case. Now looking more into it, i see that powernv has something called
>>>>> "num_chips", isnt this same as sockets ? Do we need num_chips separately?
>>>>
>>>> Ah, yes, I see.  It's probably still reasonable to keep num_chips as
>>>> an internal variable, rather than using (smp_cpus / smp_cores /
>>>> smp_threads) everywhere.  But we shouldn't have it as a direct
>>>> user-settable property, instead setting it from the -smp command line
>>>> option.
>>>
>>> Something like the below works till num_chips=2, after that guest does
>>> not boot up. This might be some limitation within the OS, Cedric might
>>> have some clue.
>>
>> Some controllers might need some more tweaking, PSI, LPC, to elect a 
>> master one.
> 
> Uh.. why?

that's not true. I managed to boot a pnv machine with 4 chips/sockets 
each having 4 cpus using a 4.4.9-openpower2 skiroot kernel, from an 
openpower firmare 1.10 I think. Recent openpower kernel must be using 
some new features/instructions that we don't manage well in QEMU.

I would need to build a kernel with more output.

>> Anyhow I don't think we want to deduce the number of chips
>> from the number of cpus. These two figures are very different.
> 
> How so?  It's not totally perfect, but making a single chip correspond
> to a "socket" in qemu's (somewhat x86 centric) terminology is still a
> pretty good match.

well, it would be good to be able to define chips with different
numbers of cpus. That is something will we want to do for sure.

Thanks,

C.
Cédric Le Goater Sept. 22, 2017, 10:56 a.m. UTC | #4
On 09/22/2017 08:00 AM, Nikunj A Dadhania wrote:
> David Gibson <david@gibson.dropbear.id.au> writes:
> 
>>>>>
>>>>> As smp_thread defaults to 1 in vl.c, similarly smp_cores also has the
>>>>> default value of 1 in vl.c. In powernv, we were setting nr-cores like
>>>>> this:
>>>>>
>>>>>         object_property_set_int(chip, smp_cores, "nr-cores", &error_fatal);
>>>>>
>>>>> Even when there were multiple cpus (-smp 4), when the guest boots up, we
>>>>> just get one core (i.e. smp_cores was 1) with single thread(smp_threads
>>>>> was 1), which is wrong as per the command-line that was provided.
>>>>
>>>> Right, so, -smp 4 defaults to 4 sockets, each with 1 core of 1
>>>> thread.  If you can't supply 4 sockets you should error, but you
>>>> shouldn't go and change the number of cores per socket.
>>>
>>> OK, that makes sense now. And I do see that smp_cpus is 4 in the above
>>> case. Now looking more into it, i see that powernv has something called
>>> "num_chips", isnt this same as sockets ? Do we need num_chips separately?
>>
>> Ah, yes, I see.  It's probably still reasonable to keep num_chips as
>> an internal variable, rather than using (smp_cpus / smp_cores /
>> smp_threads) everywhere.  But we shouldn't have it as a direct
>> user-settable property, instead setting it from the -smp command line
>> option.
> 
> Something like the below works till num_chips=2, after that guest does
> not boot up. This might be some limitation within the OS, Cedric might
> have some clue. Otherwise, I see that multiple chips are created with
> single core having single thread.
> 
>     ppc/pnv: Use num_chips for multiple sockets
>     
>     When the user does not provide the cpu topology, e.g. "-smp 4", machine fails to
>     initialize 4 cpus. QEMU assumes smp_threads and smp_cores both as 1. Make sure
>     that we initialize multiple chips for this.

-smp 4          would give a machine with 4 sockets with 1 core
-smp 4,cores=4  would give a machine with 1 socket  with 4 cores

correct ?

C.

>     Remove the user-settable property num_chips from machine command line option
>     
>     Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
> 
> diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
> index 9724719..fa501f9 100644
> --- a/hw/ppc/pnv.c
> +++ b/hw/ppc/pnv.c
> @@ -709,7 +709,17 @@ static void ppc_powernv_init(MachineState *machine)
>          exit(1);
>      }
>  
> +    pnv->num_chips = smp_cpus / (smp_cores * smp_threads);
>      pnv->chips = g_new0(PnvChip *, pnv->num_chips);
> +
> +    if (smp_cpus != (smp_threads * pnv->num_chips * smp_cores)) {
> +        error_report("cpu topology not balanced: "
> +                     "chips (%u) * cores (%u) * threads (%u) != "
> +                     "number of cpus (%u)",
> +                     pnv->num_chips, smp_cores, smp_threads, smp_cpus);
> +        exit(1);
> +    }
> +
>      for (i = 0; i < pnv->num_chips; i++) {
>          char chip_name[32];
>          Object *chip = object_new(chip_typename);
> @@ -1255,53 +1265,12 @@ static void pnv_pic_print_info(InterruptStatsProvider *obj,
>      }
>  }
>  
> -static void pnv_get_num_chips(Object *obj, Visitor *v, const char *name,
> -                              void *opaque, Error **errp)
> -{
> -    visit_type_uint32(v, name, &POWERNV_MACHINE(obj)->num_chips, errp);
> -}
> -
> -static void pnv_set_num_chips(Object *obj, Visitor *v, const char *name,
> -                              void *opaque, Error **errp)
> -{
> -    PnvMachineState *pnv = POWERNV_MACHINE(obj);
> -    uint32_t num_chips;
> -    Error *local_err = NULL;
> -
> -    visit_type_uint32(v, name, &num_chips, &local_err);
> -    if (local_err) {
> -        error_propagate(errp, local_err);
> -        return;
> -    }
> -
> -    /*
> -     * TODO: should we decide on how many chips we can create based
> -     * on #cores and Venice vs. Murano vs. Naples chip type etc...,
> -     */
> -    if (!is_power_of_2(num_chips) || num_chips > 4) {
> -        error_setg(errp, "invalid number of chips: '%d'", num_chips);
> -        return;
> -    }
> -
> -    pnv->num_chips = num_chips;
> -}
> -
>  static void powernv_machine_initfn(Object *obj)
>  {
>      PnvMachineState *pnv = POWERNV_MACHINE(obj);
>      pnv->num_chips = 1;
>  }
>  
> -static void powernv_machine_class_props_init(ObjectClass *oc)
> -{
> -    object_class_property_add(oc, "num-chips", "uint32",
> -                              pnv_get_num_chips, pnv_set_num_chips,
> -                              NULL, NULL, NULL);
> -    object_class_property_set_description(oc, "num-chips",
> -                              "Specifies the number of processor chips",
> -                              NULL);
> -}
> -
>  static void powernv_machine_class_init(ObjectClass *oc, void *data)
>  {
>      MachineClass *mc = MACHINE_CLASS(oc);
> @@ -1321,8 +1290,6 @@ static void powernv_machine_class_init(ObjectClass *oc, void *data)
>      xic->ics_get = pnv_ics_get;
>      xic->ics_resend = pnv_ics_resend;
>      ispc->print_info = pnv_pic_print_info;
> -
> -    powernv_machine_class_props_init(oc);
>  }
>  
>  static const TypeInfo powernv_machine_info = {
>
Nikunj A. Dadhania Sept. 22, 2017, 11:06 a.m. UTC | #5
Cédric Le Goater <clg@kaod.org> writes:

> On 09/22/2017 08:00 AM, Nikunj A Dadhania wrote:
>> David Gibson <david@gibson.dropbear.id.au> writes:
>> 
>>>>>>
>>>>>> As smp_thread defaults to 1 in vl.c, similarly smp_cores also has the
>>>>>> default value of 1 in vl.c. In powernv, we were setting nr-cores like
>>>>>> this:
>>>>>>
>>>>>>         object_property_set_int(chip, smp_cores, "nr-cores", &error_fatal);
>>>>>>
>>>>>> Even when there were multiple cpus (-smp 4), when the guest boots up, we
>>>>>> just get one core (i.e. smp_cores was 1) with single thread(smp_threads
>>>>>> was 1), which is wrong as per the command-line that was provided.
>>>>>
>>>>> Right, so, -smp 4 defaults to 4 sockets, each with 1 core of 1
>>>>> thread.  If you can't supply 4 sockets you should error, but you
>>>>> shouldn't go and change the number of cores per socket.
>>>>
>>>> OK, that makes sense now. And I do see that smp_cpus is 4 in the above
>>>> case. Now looking more into it, i see that powernv has something called
>>>> "num_chips", isnt this same as sockets ? Do we need num_chips separately?
>>>
>>> Ah, yes, I see.  It's probably still reasonable to keep num_chips as
>>> an internal variable, rather than using (smp_cpus / smp_cores /
>>> smp_threads) everywhere.  But we shouldn't have it as a direct
>>> user-settable property, instead setting it from the -smp command line
>>> option.
>> 
>> Something like the below works till num_chips=2, after that guest does
>> not boot up. This might be some limitation within the OS, Cedric might
>> have some clue. Otherwise, I see that multiple chips are created with
>> single core having single thread.
>> 
>>     ppc/pnv: Use num_chips for multiple sockets
>>     
>>     When the user does not provide the cpu topology, e.g. "-smp 4", machine fails to
>>     initialize 4 cpus. QEMU assumes smp_threads and smp_cores both as 1. Make sure
>>     that we initialize multiple chips for this.
>
> -smp 4          would give a machine with 4 sockets with 1 core
> -smp 4,cores=4  would give a machine with 1 socket  with 4 cores
>
> correct ?

Yes

Regards
Nikunj
David Gibson Sept. 22, 2017, 11:20 a.m. UTC | #6
On Fri, Sep 22, 2017 at 12:46:58PM +0200, Cédric Le Goater wrote:
> On 09/22/2017 12:12 PM, David Gibson wrote:
> > On Fri, Sep 22, 2017 at 08:07:06AM +0200, Cédric Le Goater wrote:
> >> On 09/22/2017 08:00 AM, Nikunj A Dadhania wrote:
> >>> David Gibson <david@gibson.dropbear.id.au> writes:
> >>>
> >>>>>>>
> >>>>>>> As smp_thread defaults to 1 in vl.c, similarly smp_cores also has the
> >>>>>>> default value of 1 in vl.c. In powernv, we were setting nr-cores like
> >>>>>>> this:
> >>>>>>>
> >>>>>>>         object_property_set_int(chip, smp_cores, "nr-cores", &error_fatal);
> >>>>>>>
> >>>>>>> Even when there were multiple cpus (-smp 4), when the guest boots up, we
> >>>>>>> just get one core (i.e. smp_cores was 1) with single thread(smp_threads
> >>>>>>> was 1), which is wrong as per the command-line that was provided.
> >>>>>>
> >>>>>> Right, so, -smp 4 defaults to 4 sockets, each with 1 core of 1
> >>>>>> thread.  If you can't supply 4 sockets you should error, but you
> >>>>>> shouldn't go and change the number of cores per socket.
> >>>>>
> >>>>> OK, that makes sense now. And I do see that smp_cpus is 4 in the above
> >>>>> case. Now looking more into it, i see that powernv has something called
> >>>>> "num_chips", isnt this same as sockets ? Do we need num_chips separately?
> >>>>
> >>>> Ah, yes, I see.  It's probably still reasonable to keep num_chips as
> >>>> an internal variable, rather than using (smp_cpus / smp_cores /
> >>>> smp_threads) everywhere.  But we shouldn't have it as a direct
> >>>> user-settable property, instead setting it from the -smp command line
> >>>> option.
> >>>
> >>> Something like the below works till num_chips=2, after that guest does
> >>> not boot up. This might be some limitation within the OS, Cedric might
> >>> have some clue.
> >>
> >> Some controllers might need some more tweaking, PSI, LPC, to elect a 
> >> master one.
> > 
> > Uh.. why?
> 
> that's not true. I managed to boot a pnv machine with 4 chips/sockets 
> each having 4 cpus using a 4.4.9-openpower2 skiroot kernel, from an 
> openpower firmare 1.10 I think. Recent openpower kernel must be using 
> some new features/instructions that we don't manage well in QEMU.
> 
> I would need to build a kernel with more output.
> 
> >> Anyhow I don't think we want to deduce the number of chips
> >> from the number of cpus. These two figures are very different.
> > 
> > How so?  It's not totally perfect, but making a single chip correspond
> > to a "socket" in qemu's (somewhat x86 centric) terminology is still a
> > pretty good match.
> 
> well, it would be good to be able to define chips with different
> numbers of cpus. That is something will we want to do for sure.

You mean multiple chips in a single system with non-uniform numbers of
cores?  Are there really such systems in the wild?
David Gibson Sept. 22, 2017, 11:36 a.m. UTC | #7
On Fri, Sep 22, 2017 at 12:56:40PM +0200, Cédric Le Goater wrote:
> On 09/22/2017 08:00 AM, Nikunj A Dadhania wrote:
> > David Gibson <david@gibson.dropbear.id.au> writes:
> > 
> >>>>>
> >>>>> As smp_thread defaults to 1 in vl.c, similarly smp_cores also has the
> >>>>> default value of 1 in vl.c. In powernv, we were setting nr-cores like
> >>>>> this:
> >>>>>
> >>>>>         object_property_set_int(chip, smp_cores, "nr-cores", &error_fatal);
> >>>>>
> >>>>> Even when there were multiple cpus (-smp 4), when the guest boots up, we
> >>>>> just get one core (i.e. smp_cores was 1) with single thread(smp_threads
> >>>>> was 1), which is wrong as per the command-line that was provided.
> >>>>
> >>>> Right, so, -smp 4 defaults to 4 sockets, each with 1 core of 1
> >>>> thread.  If you can't supply 4 sockets you should error, but you
> >>>> shouldn't go and change the number of cores per socket.
> >>>
> >>> OK, that makes sense now. And I do see that smp_cpus is 4 in the above
> >>> case. Now looking more into it, i see that powernv has something called
> >>> "num_chips", isnt this same as sockets ? Do we need num_chips separately?
> >>
> >> Ah, yes, I see.  It's probably still reasonable to keep num_chips as
> >> an internal variable, rather than using (smp_cpus / smp_cores /
> >> smp_threads) everywhere.  But we shouldn't have it as a direct
> >> user-settable property, instead setting it from the -smp command line
> >> option.
> > 
> > Something like the below works till num_chips=2, after that guest does
> > not boot up. This might be some limitation within the OS, Cedric might
> > have some clue. Otherwise, I see that multiple chips are created with
> > single core having single thread.
> > 
> >     ppc/pnv: Use num_chips for multiple sockets
> >     
> >     When the user does not provide the cpu topology, e.g. "-smp 4", machine fails to
> >     initialize 4 cpus. QEMU assumes smp_threads and smp_cores both as 1. Make sure
> >     that we initialize multiple chips for this.
> 
> -smp 4          would give a machine with 4 sockets with 1 core
> -smp 4,cores=4  would give a machine with 1 socket  with 4 cores
> 
> correct ?

That's right.
Cédric Le Goater Sept. 22, 2017, 11:37 a.m. UTC | #8
>> well, it would be good to be able to define chips with different
>> numbers of cpus. That is something will we want to do for sure.
> 
> You mean multiple chips in a single system with non-uniform numbers of
> cores?  Are there really such systems in the wild?
> 

When CPU fail for some reason, yes. They are garded by the FW and next
boot the system will have different numbers of CPUs on its chip.

C.
Greg Kurz Sept. 22, 2017, 11:46 a.m. UTC | #9
On Fri, 22 Sep 2017 21:20:42 +1000
David Gibson <david@gibson.dropbear.id.au> wrote:

> On Fri, Sep 22, 2017 at 12:46:58PM +0200, Cédric Le Goater wrote:
> > On 09/22/2017 12:12 PM, David Gibson wrote:  
> > > On Fri, Sep 22, 2017 at 08:07:06AM +0200, Cédric Le Goater wrote:  
> > >> On 09/22/2017 08:00 AM, Nikunj A Dadhania wrote:  
> > >>> David Gibson <david@gibson.dropbear.id.au> writes:
> > >>>  
> > >>>>>>>
> > >>>>>>> As smp_thread defaults to 1 in vl.c, similarly smp_cores also has the
> > >>>>>>> default value of 1 in vl.c. In powernv, we were setting nr-cores like
> > >>>>>>> this:
> > >>>>>>>
> > >>>>>>>         object_property_set_int(chip, smp_cores, "nr-cores", &error_fatal);
> > >>>>>>>
> > >>>>>>> Even when there were multiple cpus (-smp 4), when the guest boots up, we
> > >>>>>>> just get one core (i.e. smp_cores was 1) with single thread(smp_threads
> > >>>>>>> was 1), which is wrong as per the command-line that was provided.  
> > >>>>>>
> > >>>>>> Right, so, -smp 4 defaults to 4 sockets, each with 1 core of 1
> > >>>>>> thread.  If you can't supply 4 sockets you should error, but you
> > >>>>>> shouldn't go and change the number of cores per socket.  
> > >>>>>
> > >>>>> OK, that makes sense now. And I do see that smp_cpus is 4 in the above
> > >>>>> case. Now looking more into it, i see that powernv has something called
> > >>>>> "num_chips", isnt this same as sockets ? Do we need num_chips separately?  
> > >>>>
> > >>>> Ah, yes, I see.  It's probably still reasonable to keep num_chips as
> > >>>> an internal variable, rather than using (smp_cpus / smp_cores /
> > >>>> smp_threads) everywhere.  But we shouldn't have it as a direct
> > >>>> user-settable property, instead setting it from the -smp command line
> > >>>> option.  
> > >>>
> > >>> Something like the below works till num_chips=2, after that guest does
> > >>> not boot up. This might be some limitation within the OS, Cedric might
> > >>> have some clue.  
> > >>
> > >> Some controllers might need some more tweaking, PSI, LPC, to elect a 
> > >> master one.  
> > > 
> > > Uh.. why?  
> > 
> > that's not true. I managed to boot a pnv machine with 4 chips/sockets 
> > each having 4 cpus using a 4.4.9-openpower2 skiroot kernel, from an 
> > openpower firmare 1.10 I think. Recent openpower kernel must be using 
> > some new features/instructions that we don't manage well in QEMU.
> > 
> > I would need to build a kernel with more output.
> >   
> > >> Anyhow I don't think we want to deduce the number of chips
> > >> from the number of cpus. These two figures are very different.  
> > > 
> > > How so?  It's not totally perfect, but making a single chip correspond
> > > to a "socket" in qemu's (somewhat x86 centric) terminology is still a
> > > pretty good match.  
> > 
> > well, it would be good to be able to define chips with different
> > numbers of cpus. That is something will we want to do for sure.  
> 
> You mean multiple chips in a single system with non-uniform numbers of
> cores?  Are there really such systems in the wild?
> 

Doesn't it happen when a CPU core gets deconfigured ?
David Gibson Sept. 22, 2017, 11:49 a.m. UTC | #10
On Fri, Sep 22, 2017 at 01:37:39PM +0200, Cédric Le Goater wrote:
> >> well, it would be good to be able to define chips with different
> >> numbers of cpus. That is something will we want to do for sure.
> > 
> > You mean multiple chips in a single system with non-uniform numbers of
> > cores?  Are there really such systems in the wild?
> > 
> 
> When CPU fail for some reason, yes. They are garded by the FW and next
> boot the system will have different numbers of CPUs on its chip.

Ok, but you can model that with max_cpus the full potential number,
then just don't have all of them on initially.  The basic topology
remains uniform.
diff mbox

Patch

diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index 9724719..fa501f9 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -709,7 +709,17 @@  static void ppc_powernv_init(MachineState *machine)
         exit(1);
     }
 
+    pnv->num_chips = smp_cpus / (smp_cores * smp_threads);
     pnv->chips = g_new0(PnvChip *, pnv->num_chips);
+
+    if (smp_cpus != (smp_threads * pnv->num_chips * smp_cores)) {
+        error_report("cpu topology not balanced: "
+                     "chips (%u) * cores (%u) * threads (%u) != "
+                     "number of cpus (%u)",
+                     pnv->num_chips, smp_cores, smp_threads, smp_cpus);
+        exit(1);
+    }
+
     for (i = 0; i < pnv->num_chips; i++) {
         char chip_name[32];
         Object *chip = object_new(chip_typename);
@@ -1255,53 +1265,12 @@  static void pnv_pic_print_info(InterruptStatsProvider *obj,
     }
 }
 
-static void pnv_get_num_chips(Object *obj, Visitor *v, const char *name,
-                              void *opaque, Error **errp)
-{
-    visit_type_uint32(v, name, &POWERNV_MACHINE(obj)->num_chips, errp);
-}
-
-static void pnv_set_num_chips(Object *obj, Visitor *v, const char *name,
-                              void *opaque, Error **errp)
-{
-    PnvMachineState *pnv = POWERNV_MACHINE(obj);
-    uint32_t num_chips;
-    Error *local_err = NULL;
-
-    visit_type_uint32(v, name, &num_chips, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        return;
-    }
-
-    /*
-     * TODO: should we decide on how many chips we can create based
-     * on #cores and Venice vs. Murano vs. Naples chip type etc...,
-     */
-    if (!is_power_of_2(num_chips) || num_chips > 4) {
-        error_setg(errp, "invalid number of chips: '%d'", num_chips);
-        return;
-    }
-
-    pnv->num_chips = num_chips;
-}
-
 static void powernv_machine_initfn(Object *obj)
 {
     PnvMachineState *pnv = POWERNV_MACHINE(obj);
     pnv->num_chips = 1;
 }
 
-static void powernv_machine_class_props_init(ObjectClass *oc)
-{
-    object_class_property_add(oc, "num-chips", "uint32",
-                              pnv_get_num_chips, pnv_set_num_chips,
-                              NULL, NULL, NULL);
-    object_class_property_set_description(oc, "num-chips",
-                              "Specifies the number of processor chips",
-                              NULL);
-}
-
 static void powernv_machine_class_init(ObjectClass *oc, void *data)
 {
     MachineClass *mc = MACHINE_CLASS(oc);
@@ -1321,8 +1290,6 @@  static void powernv_machine_class_init(ObjectClass *oc, void *data)
     xic->ics_get = pnv_ics_get;
     xic->ics_resend = pnv_ics_resend;
     ispc->print_info = pnv_pic_print_info;
-
-    powernv_machine_class_props_init(oc);
 }
 
 static const TypeInfo powernv_machine_info = {