diff mbox

ppc/pnv: fix cores per chip for multiple cpus

Message ID 20170906082748.28520-1-nikunj@linux.vnet.ibm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Nikunj A. Dadhania Sept. 6, 2017, 8:27 a.m. UTC
When the user does not provide the cpu topology, e.g. "-smp 4", machine fails to
initialize 4 cpus. Compute the chip per cores depending on the number of chips
and smt threads.

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
---
 hw/ppc/pnv.c | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

Comments

Cédric Le Goater Sept. 8, 2017, 6:38 a.m. UTC | #1
On 09/06/2017 10:27 AM, Nikunj A Dadhania wrote:
> When the user does not provide the cpu topology, e.g. "-smp 4", machine fails to
> initialize 4 cpus. Compute the chip per cores depending on the number of chips
> and smt threads.

I think we could also use the '-numa' options to define the cpus
per chip but this patch defines a good default layout and fixes 
an issue.  

> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>

Reviewed-by: Cédric Le Goater <clg@kaod.org>
Tested-by: Cédric Le Goater <clg@kaod.org>

I pushed the patch under the latest powernv tree :

	https://github.com/legoater/qemu.git tags/powernv-2.10

Thanks,

C.

> ---
>  hw/ppc/pnv.c | 20 ++++++++++++++++++--
>  1 file changed, 18 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
> index 9724719..3fbaafb 100644
> --- a/hw/ppc/pnv.c
> +++ b/hw/ppc/pnv.c
> @@ -642,7 +642,7 @@ static void ppc_powernv_init(MachineState *machine)
>      MemoryRegion *ram;
>      char *fw_filename;
>      long fw_size;
> -    int i;
> +    int i, cores_per_chip;
>      char *chip_typename;
>      PCIBus *pbus;
>      bool has_gfx = false;
> @@ -710,6 +710,22 @@ static void ppc_powernv_init(MachineState *machine)
>      }
>  
>      pnv->chips = g_new0(PnvChip *, pnv->num_chips);
> +
> +    /* If user has specified number of cores, use it. Otherwise, compute it. */
> +    if (smp_cores != 1) {
> +        cores_per_chip = smp_cores;
> +    } else {
> +        cores_per_chip = smp_cpus / (smp_threads * pnv->num_chips);
> +    }
> +
> +    if (smp_cpus != (smp_threads * pnv->num_chips * cores_per_chip)) {
> +        error_report("cpu topology not balanced: "
> +                     "chips (%u) * cores (%u) * threads (%u) != "
> +                     "number of cpus (%u)",
> +                     pnv->num_chips, cores_per_chip, smp_threads, smp_cpus);
> +        exit(1);
> +    }
> +
>      for (i = 0; i < pnv->num_chips; i++) {
>          char chip_name[32];
>          Object *chip = object_new(chip_typename);
> @@ -728,7 +744,7 @@ static void ppc_powernv_init(MachineState *machine)
>          object_property_add_child(OBJECT(pnv), chip_name, chip, &error_fatal);
>          object_property_set_int(chip, PNV_CHIP_HWID(i), "chip-id",
>                                  &error_fatal);
> -        object_property_set_int(chip, smp_cores, "nr-cores", &error_fatal);
> +        object_property_set_int(chip, cores_per_chip, "nr-cores", &error_fatal);
>          object_property_set_int(chip, 1, "num-phbs", &error_fatal);
>          object_property_set_bool(chip, true, "realized", &error_fatal);
>      }
>
David Gibson Sept. 9, 2017, 7:02 a.m. UTC | #2
On Wed, Sep 06, 2017 at 01:57:48PM +0530, Nikunj A Dadhania wrote:
> When the user does not provide the cpu topology, e.g. "-smp 4", machine fails to
> initialize 4 cpus. Compute the chip per cores depending on the number of chips
> and smt threads.
> 
> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>

I don't understand why simply treating smp_cores as cores per chip is wrong.

> ---
>  hw/ppc/pnv.c | 20 ++++++++++++++++++--
>  1 file changed, 18 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
> index 9724719..3fbaafb 100644
> --- a/hw/ppc/pnv.c
> +++ b/hw/ppc/pnv.c
> @@ -642,7 +642,7 @@ static void ppc_powernv_init(MachineState *machine)
>      MemoryRegion *ram;
>      char *fw_filename;
>      long fw_size;
> -    int i;
> +    int i, cores_per_chip;
>      char *chip_typename;
>      PCIBus *pbus;
>      bool has_gfx = false;
> @@ -710,6 +710,22 @@ static void ppc_powernv_init(MachineState *machine)
>      }
>  
>      pnv->chips = g_new0(PnvChip *, pnv->num_chips);
> +
> +    /* If user has specified number of cores, use it. Otherwise, compute it. */
> +    if (smp_cores != 1) {
> +        cores_per_chip = smp_cores;
> +    } else {
> +        cores_per_chip = smp_cpus / (smp_threads * pnv->num_chips);
> +    }
> +
> +    if (smp_cpus != (smp_threads * pnv->num_chips * cores_per_chip)) {
> +        error_report("cpu topology not balanced: "
> +                     "chips (%u) * cores (%u) * threads (%u) != "
> +                     "number of cpus (%u)",
> +                     pnv->num_chips, cores_per_chip, smp_threads, smp_cpus);
> +        exit(1);
> +    }
> +
>      for (i = 0; i < pnv->num_chips; i++) {
>          char chip_name[32];
>          Object *chip = object_new(chip_typename);
> @@ -728,7 +744,7 @@ static void ppc_powernv_init(MachineState *machine)
>          object_property_add_child(OBJECT(pnv), chip_name, chip, &error_fatal);
>          object_property_set_int(chip, PNV_CHIP_HWID(i), "chip-id",
>                                  &error_fatal);
> -        object_property_set_int(chip, smp_cores, "nr-cores", &error_fatal);
> +        object_property_set_int(chip, cores_per_chip, "nr-cores", &error_fatal);
>          object_property_set_int(chip, 1, "num-phbs", &error_fatal);
>          object_property_set_bool(chip, true, "realized", &error_fatal);
>      }
Nikunj A. Dadhania Sept. 11, 2017, 5:10 a.m. UTC | #3
David Gibson <david@gibson.dropbear.id.au> writes:

> On Wed, Sep 06, 2017 at 01:57:48PM +0530, Nikunj A Dadhania wrote:
>> When the user does not provide the cpu topology, e.g. "-smp 4", machine fails to
>> initialize 4 cpus. Compute the chip per cores depending on the number of chips
>> and smt threads.
>> 
>> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
>
> I don't understand why simply treating smp_cores as cores per chip is wrong.

We do not have SMT support and when "-smp 4" is passed, smp_cores is
always set to 1. So only once core with one thread finally show up in
the guest. Moreover, I see spapr too doing similar thing in
spapr_init_cpus() with boot_cores_nr.

Regards
Nikunj
David Gibson Sept. 13, 2017, 7:35 a.m. UTC | #4
On Mon, Sep 11, 2017 at 10:40:10AM +0530, Nikunj A Dadhania wrote:
> David Gibson <david@gibson.dropbear.id.au> writes:
> 
> > On Wed, Sep 06, 2017 at 01:57:48PM +0530, Nikunj A Dadhania wrote:
> >> When the user does not provide the cpu topology, e.g. "-smp 4", machine fails to
> >> initialize 4 cpus. Compute the chip per cores depending on the number of chips
> >> and smt threads.
> >> 
> >> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
> >
> > I don't understand why simply treating smp_cores as cores per chip is wrong.
> 
> We do not have SMT support and when "-smp 4" is passed, smp_cores is
> always set to 1. So only once core with one thread finally show up in
> the guest. Moreover, I see spapr too doing similar thing in
> spapr_init_cpus() with boot_cores_nr.

I'm ok with adding an error if smp_threads > 1, since that won't
work.  Breaking the identity that smp_cores is the number of cores per
socket (which should correspond to one chip) is not ok, though.

I think you're misinterpreting the boot_cores_nr stuff - that's just
about translating the number of initially online vcpus into a number
of initially online cores.
Nikunj A. Dadhania Sept. 14, 2017, 5:12 a.m. UTC | #5
David Gibson <david@gibson.dropbear.id.au> writes:

> On Mon, Sep 11, 2017 at 10:40:10AM +0530, Nikunj A Dadhania wrote:
>> David Gibson <david@gibson.dropbear.id.au> writes:
>> 
>> > On Wed, Sep 06, 2017 at 01:57:48PM +0530, Nikunj A Dadhania wrote:
>> >> When the user does not provide the cpu topology, e.g. "-smp 4", machine fails to
>> >> initialize 4 cpus. Compute the chip per cores depending on the number of chips
>> >> and smt threads.
>> >> 
>> >> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
>> >
>> > I don't understand why simply treating smp_cores as cores per chip is wrong.
>> 
>> We do not have SMT support and when "-smp 4" is passed, smp_cores is
>> always set to 1. So only once core with one thread finally show up in
>> the guest. Moreover, I see spapr too doing similar thing in
>> spapr_init_cpus() with boot_cores_nr.
>
> I'm ok with adding an error if smp_threads > 1, since that won't
> work.  Breaking the identity that smp_cores is the number of cores per
> socket (which should correspond to one chip) is not ok, though.
>
> I think you're misinterpreting the boot_cores_nr stuff - that's just
> about translating the number of initially online vcpus into a number
> of initially online cores.

I thought, I am doing the same here for PowerNV, number of online cores
is equal to initial online vcpus / threads per core

   int boot_cores_nr = smp_cpus / smp_threads;

Only difference that I see in PowerNV is that we have multiple chips
(max 2, at the moment)

        cores_per_chip = smp_cpus / (smp_threads * pnv->num_chips);

And in case user has provided sane smp_cores, we use it.

Regards,
Nikunj
David Gibson Sept. 19, 2017, 8:24 a.m. UTC | #6
On Fri, Sep 15, 2017 at 02:39:16PM +0530, Nikunj A Dadhania wrote:
> David Gibson <david@gibson.dropbear.id.au> writes:
> 
> > On Fri, Sep 15, 2017 at 01:53:15PM +0530, Nikunj A Dadhania wrote:
> >> David Gibson <david@gibson.dropbear.id.au> writes:
> >> 
> >> >> 
> >> >> I thought, I am doing the same here for PowerNV, number of online cores
> >> >> is equal to initial online vcpus / threads per core
> >> >> 
> >> >>    int boot_cores_nr = smp_cpus / smp_threads;
> >> >> 
> >> >> Only difference that I see in PowerNV is that we have multiple chips
> >> >> (max 2, at the moment)
> >> >> 
> >> >>         cores_per_chip = smp_cpus / (smp_threads * pnv->num_chips);
> >> >
> >> > This doesn't make sense to me.  Cores per chip should *always* equal
> >> > smp_cores, you shouldn't need another calculation for it.
> >> >
> >> >> And in case user has provided sane smp_cores, we use it.
> >> >
> >> > If smp_cores isn't sane, you should simply reject it, not try to fix
> >> > it.  That's just asking for confusion.
> >> 
> >> This is the case where the user does not provide a topology(which is a
> >> valid scenario), not sure we should reject it. So qemu defaults
> >> smp_cores/smt_threads to 1. I think it makes sense to over-ride.
> >
> > If you can find a way to override it by altering smp_cores when it's
> > not explicitly specified, then ok.
> 
> Should I change the global smp_cores here as well ?

I'm pretty uneasy with that option.  It would take a fair bit of
checking to ensure that changing smp_cores is safe here.  An easier to
verify option would be to make the generic logic which splits up an
unspecified -smp N into cores and sockets more flexible, possibly
based on machine options for max values.

That might still be more trouble than its worth.

> > But overriding smp_cores with a different variable that's the "real"
> > number of cores is not acceptable. If that means the user has to
> > specify cores explicitly, so be it.
> 
> Right, we would error out in case there is mismatch.
> 
> > Slight awkwardness in command line is preferable to breaking the
> > assumption that smp_cores == (# of cores per next level up cpu object)
> > which is used all over the place.
> 
> Regards
> Nikunj
>
Nikunj A. Dadhania Sept. 20, 2017, 4:20 a.m. UTC | #7
David Gibson <david@gibson.dropbear.id.au> writes:

> On Fri, Sep 15, 2017 at 02:39:16PM +0530, Nikunj A Dadhania wrote:
>> David Gibson <david@gibson.dropbear.id.au> writes:
>> 
>> > On Fri, Sep 15, 2017 at 01:53:15PM +0530, Nikunj A Dadhania wrote:
>> >> David Gibson <david@gibson.dropbear.id.au> writes:
>> >> 
>> >> >> 
>> >> >> I thought, I am doing the same here for PowerNV, number of online cores
>> >> >> is equal to initial online vcpus / threads per core
>> >> >> 
>> >> >>    int boot_cores_nr = smp_cpus / smp_threads;
>> >> >> 
>> >> >> Only difference that I see in PowerNV is that we have multiple chips
>> >> >> (max 2, at the moment)
>> >> >> 
>> >> >>         cores_per_chip = smp_cpus / (smp_threads * pnv->num_chips);
>> >> >
>> >> > This doesn't make sense to me.  Cores per chip should *always* equal
>> >> > smp_cores, you shouldn't need another calculation for it.
>> >> >
>> >> >> And in case user has provided sane smp_cores, we use it.
>> >> >
>> >> > If smp_cores isn't sane, you should simply reject it, not try to fix
>> >> > it.  That's just asking for confusion.
>> >> 
>> >> This is the case where the user does not provide a topology(which is a
>> >> valid scenario), not sure we should reject it. So qemu defaults
>> >> smp_cores/smt_threads to 1. I think it makes sense to over-ride.
>> >
>> > If you can find a way to override it by altering smp_cores when it's
>> > not explicitly specified, then ok.
>> 
>> Should I change the global smp_cores here as well ?
>
> I'm pretty uneasy with that option.

Me too.

> It would take a fair bit of checking to ensure that changing smp_cores
> is safe here. An easier to verify option would be to make the generic
> logic which splits up an unspecified -smp N into cores and sockets
> more flexible, possibly based on machine options for max values.
>
> That might still be more trouble than its worth.

I think the current approach is the simplest and less intrusive, as we
are handling a case where user has not bothered to provide a detailed
topology, the best we can do is create single threaded cores equal to
number of cores.

Regards
Nikunj
David Gibson Sept. 20, 2017, 4:55 a.m. UTC | #8
On Wed, Sep 20, 2017 at 09:50:24AM +0530, Nikunj A Dadhania wrote:
> David Gibson <david@gibson.dropbear.id.au> writes:
> 
> > On Fri, Sep 15, 2017 at 02:39:16PM +0530, Nikunj A Dadhania wrote:
> >> David Gibson <david@gibson.dropbear.id.au> writes:
> >> 
> >> > On Fri, Sep 15, 2017 at 01:53:15PM +0530, Nikunj A Dadhania wrote:
> >> >> David Gibson <david@gibson.dropbear.id.au> writes:
> >> >> 
> >> >> >> 
> >> >> >> I thought, I am doing the same here for PowerNV, number of online cores
> >> >> >> is equal to initial online vcpus / threads per core
> >> >> >> 
> >> >> >>    int boot_cores_nr = smp_cpus / smp_threads;
> >> >> >> 
> >> >> >> Only difference that I see in PowerNV is that we have multiple chips
> >> >> >> (max 2, at the moment)
> >> >> >> 
> >> >> >>         cores_per_chip = smp_cpus / (smp_threads * pnv->num_chips);
> >> >> >
> >> >> > This doesn't make sense to me.  Cores per chip should *always* equal
> >> >> > smp_cores, you shouldn't need another calculation for it.
> >> >> >
> >> >> >> And in case user has provided sane smp_cores, we use it.
> >> >> >
> >> >> > If smp_cores isn't sane, you should simply reject it, not try to fix
> >> >> > it.  That's just asking for confusion.
> >> >> 
> >> >> This is the case where the user does not provide a topology(which is a
> >> >> valid scenario), not sure we should reject it. So qemu defaults
> >> >> smp_cores/smt_threads to 1. I think it makes sense to over-ride.
> >> >
> >> > If you can find a way to override it by altering smp_cores when it's
> >> > not explicitly specified, then ok.
> >> 
> >> Should I change the global smp_cores here as well ?
> >
> > I'm pretty uneasy with that option.
> 
> Me too.
> 
> > It would take a fair bit of checking to ensure that changing smp_cores
> > is safe here. An easier to verify option would be to make the generic
> > logic which splits up an unspecified -smp N into cores and sockets
> > more flexible, possibly based on machine options for max values.
> >
> > That might still be more trouble than its worth.
> 
> I think the current approach is the simplest and less intrusive, as we
> are handling a case where user has not bothered to provide a detailed
> topology, the best we can do is create single threaded cores equal to
> number of cores.

No, sorry.  Having smp_cores not correspond to the number of cores per
chip in all cases is just not ok.  Add an error message if the
topology isn't workable for powernv by all means.  But users having to
use a longer command line is better than breaking basic assumptions
about what numbers reflect what topology.
Nikunj A. Dadhania Sept. 20, 2017, 5:13 a.m. UTC | #9
David Gibson <david@gibson.dropbear.id.au> writes:

> On Wed, Sep 20, 2017 at 09:50:24AM +0530, Nikunj A Dadhania wrote:
>> David Gibson <david@gibson.dropbear.id.au> writes:
>> 
>> > On Fri, Sep 15, 2017 at 02:39:16PM +0530, Nikunj A Dadhania wrote:
>> >> David Gibson <david@gibson.dropbear.id.au> writes:
>> >> 
>> >> > On Fri, Sep 15, 2017 at 01:53:15PM +0530, Nikunj A Dadhania wrote:
>> >> >> David Gibson <david@gibson.dropbear.id.au> writes:
>> >> >> 
>> >> >> >> 
>> >> >> >> I thought, I am doing the same here for PowerNV, number of online cores
>> >> >> >> is equal to initial online vcpus / threads per core
>> >> >> >> 
>> >> >> >>    int boot_cores_nr = smp_cpus / smp_threads;
>> >> >> >> 
>> >> >> >> Only difference that I see in PowerNV is that we have multiple chips
>> >> >> >> (max 2, at the moment)
>> >> >> >> 
>> >> >> >>         cores_per_chip = smp_cpus / (smp_threads * pnv->num_chips);
>> >> >> >
>> >> >> > This doesn't make sense to me.  Cores per chip should *always* equal
>> >> >> > smp_cores, you shouldn't need another calculation for it.
>> >> >> >
>> >> >> >> And in case user has provided sane smp_cores, we use it.
>> >> >> >
>> >> >> > If smp_cores isn't sane, you should simply reject it, not try to fix
>> >> >> > it.  That's just asking for confusion.
>> >> >> 
>> >> >> This is the case where the user does not provide a topology(which is a
>> >> >> valid scenario), not sure we should reject it. So qemu defaults
>> >> >> smp_cores/smt_threads to 1. I think it makes sense to over-ride.
>> >> >
>> >> > If you can find a way to override it by altering smp_cores when it's
>> >> > not explicitly specified, then ok.
>> >> 
>> >> Should I change the global smp_cores here as well ?
>> >
>> > I'm pretty uneasy with that option.
>> 
>> Me too.
>> 
>> > It would take a fair bit of checking to ensure that changing smp_cores
>> > is safe here. An easier to verify option would be to make the generic
>> > logic which splits up an unspecified -smp N into cores and sockets
>> > more flexible, possibly based on machine options for max values.
>> >
>> > That might still be more trouble than its worth.
>> 
>> I think the current approach is the simplest and less intrusive, as we
>> are handling a case where user has not bothered to provide a detailed
>> topology, the best we can do is create single threaded cores equal to
>> number of cores.
>
> No, sorry.  Having smp_cores not correspond to the number of cores per
> chip in all cases is just not ok.  Add an error message if the
> topology isn't workable for powernv by all means.  But users having to
> use a longer command line is better than breaking basic assumptions
> about what numbers reflect what topology.

Sorry to ask again, as I am still not convinced, we do similar
adjustment in spapr where the user did not provide the number of cores,
but qemu assumes them as single threaded cores and created
cores(boot_cores_nr) that were not same as smp_cores ?

Regards
Nikunj
David Gibson Sept. 20, 2017, 6:17 a.m. UTC | #10
On Wed, Sep 20, 2017 at 10:43:19AM +0530, Nikunj A Dadhania wrote:
> David Gibson <david@gibson.dropbear.id.au> writes:
> 
> > On Wed, Sep 20, 2017 at 09:50:24AM +0530, Nikunj A Dadhania wrote:
> >> David Gibson <david@gibson.dropbear.id.au> writes:
> >> 
> >> > On Fri, Sep 15, 2017 at 02:39:16PM +0530, Nikunj A Dadhania wrote:
> >> >> David Gibson <david@gibson.dropbear.id.au> writes:
> >> >> 
> >> >> > On Fri, Sep 15, 2017 at 01:53:15PM +0530, Nikunj A Dadhania wrote:
> >> >> >> David Gibson <david@gibson.dropbear.id.au> writes:
> >> >> >> 
> >> >> >> >> 
> >> >> >> >> I thought, I am doing the same here for PowerNV, number of online cores
> >> >> >> >> is equal to initial online vcpus / threads per core
> >> >> >> >> 
> >> >> >> >>    int boot_cores_nr = smp_cpus / smp_threads;
> >> >> >> >> 
> >> >> >> >> Only difference that I see in PowerNV is that we have multiple chips
> >> >> >> >> (max 2, at the moment)
> >> >> >> >> 
> >> >> >> >>         cores_per_chip = smp_cpus / (smp_threads * pnv->num_chips);
> >> >> >> >
> >> >> >> > This doesn't make sense to me.  Cores per chip should *always* equal
> >> >> >> > smp_cores, you shouldn't need another calculation for it.
> >> >> >> >
> >> >> >> >> And in case user has provided sane smp_cores, we use it.
> >> >> >> >
> >> >> >> > If smp_cores isn't sane, you should simply reject it, not try to fix
> >> >> >> > it.  That's just asking for confusion.
> >> >> >> 
> >> >> >> This is the case where the user does not provide a topology(which is a
> >> >> >> valid scenario), not sure we should reject it. So qemu defaults
> >> >> >> smp_cores/smt_threads to 1. I think it makes sense to over-ride.
> >> >> >
> >> >> > If you can find a way to override it by altering smp_cores when it's
> >> >> > not explicitly specified, then ok.
> >> >> 
> >> >> Should I change the global smp_cores here as well ?
> >> >
> >> > I'm pretty uneasy with that option.
> >> 
> >> Me too.
> >> 
> >> > It would take a fair bit of checking to ensure that changing smp_cores
> >> > is safe here. An easier to verify option would be to make the generic
> >> > logic which splits up an unspecified -smp N into cores and sockets
> >> > more flexible, possibly based on machine options for max values.
> >> >
> >> > That might still be more trouble than its worth.
> >> 
> >> I think the current approach is the simplest and less intrusive, as we
> >> are handling a case where user has not bothered to provide a detailed
> >> topology, the best we can do is create single threaded cores equal to
> >> number of cores.
> >
> > No, sorry.  Having smp_cores not correspond to the number of cores per
> > chip in all cases is just not ok.  Add an error message if the
> > topology isn't workable for powernv by all means.  But users having to
> > use a longer command line is better than breaking basic assumptions
> > about what numbers reflect what topology.
> 
> Sorry to ask again, as I am still not convinced, we do similar
> adjustment in spapr where the user did not provide the number of cores,
> but qemu assumes them as single threaded cores and created
> cores(boot_cores_nr) that were not same as smp_cores ?

What?  boot_cores_nr has absolutely nothing to do with adjusting the
topology, and it certainly doesn't assume they're single threaded.

boot_cores_nr is simply the number of cores (each of smp_threads
threads) which are online initially.  In an sPAPR system there are
max_cpus total potential threads.  Those are divided into cores each
with smp_threads threads (so max_cpus / smp_threads total cores), and
those cores are divided into sockets each with smp_cores sockets (so
max_cpus / smp_threads / smp_cores total sockets).  Of all those
potential threads smp_cpus are initially online (the rest can be
hotplugged later), so there are smp_cpus / smp_threads cores initially
online.

We need that calculation because we can only hotplug cpus on spapr at
core granularity, not thread granularity (x86 can do that).  If
smp_cpus is not a multiple of smp_threads we give an error (except for
old machine types, where we have some hacks for backwards compat).
Nikunj A. Dadhania Sept. 20, 2017, 6:40 a.m. UTC | #11
David Gibson <david@gibson.dropbear.id.au> writes:

> On Wed, Sep 20, 2017 at 10:43:19AM +0530, Nikunj A Dadhania wrote:
>> David Gibson <david@gibson.dropbear.id.au> writes:
>> 
>> > On Wed, Sep 20, 2017 at 09:50:24AM +0530, Nikunj A Dadhania wrote:
>> >> David Gibson <david@gibson.dropbear.id.au> writes:
>> >> 
>> >> > On Fri, Sep 15, 2017 at 02:39:16PM +0530, Nikunj A Dadhania wrote:
>> >> >> David Gibson <david@gibson.dropbear.id.au> writes:
>> >> >> 
>> >> >> > On Fri, Sep 15, 2017 at 01:53:15PM +0530, Nikunj A Dadhania wrote:
>> >> >> >> David Gibson <david@gibson.dropbear.id.au> writes:
>> >> >> >> 
>> >> >> >> >> 
>> >> >> >> >> I thought, I am doing the same here for PowerNV, number of online cores
>> >> >> >> >> is equal to initial online vcpus / threads per core
>> >> >> >> >> 
>> >> >> >> >>    int boot_cores_nr = smp_cpus / smp_threads;
>> >> >> >> >> 
>> >> >> >> >> Only difference that I see in PowerNV is that we have multiple chips
>> >> >> >> >> (max 2, at the moment)
>> >> >> >> >> 
>> >> >> >> >>         cores_per_chip = smp_cpus / (smp_threads * pnv->num_chips);
>> >> >> >> >
>> >> >> >> > This doesn't make sense to me.  Cores per chip should *always* equal
>> >> >> >> > smp_cores, you shouldn't need another calculation for it.
>> >> >> >> >
>> >> >> >> >> And in case user has provided sane smp_cores, we use it.
>> >> >> >> >
>> >> >> >> > If smp_cores isn't sane, you should simply reject it, not try to fix
>> >> >> >> > it.  That's just asking for confusion.
>> >> >> >> 
>> >> >> >> This is the case where the user does not provide a topology(which is a
>> >> >> >> valid scenario), not sure we should reject it. So qemu defaults
>> >> >> >> smp_cores/smt_threads to 1. I think it makes sense to over-ride.
>> >> >> >
>> >> >> > If you can find a way to override it by altering smp_cores when it's
>> >> >> > not explicitly specified, then ok.
>> >> >> 
>> >> >> Should I change the global smp_cores here as well ?
>> >> >
>> >> > I'm pretty uneasy with that option.
>> >> 
>> >> Me too.
>> >> 
>> >> > It would take a fair bit of checking to ensure that changing smp_cores
>> >> > is safe here. An easier to verify option would be to make the generic
>> >> > logic which splits up an unspecified -smp N into cores and sockets
>> >> > more flexible, possibly based on machine options for max values.
>> >> >
>> >> > That might still be more trouble than its worth.
>> >> 
>> >> I think the current approach is the simplest and less intrusive, as we
>> >> are handling a case where user has not bothered to provide a detailed
>> >> topology, the best we can do is create single threaded cores equal to
>> >> number of cores.
>> >
>> > No, sorry.  Having smp_cores not correspond to the number of cores per
>> > chip in all cases is just not ok.  Add an error message if the
>> > topology isn't workable for powernv by all means.  But users having to
>> > use a longer command line is better than breaking basic assumptions
>> > about what numbers reflect what topology.
>> 
>> Sorry to ask again, as I am still not convinced, we do similar
>> adjustment in spapr where the user did not provide the number of cores,
>> but qemu assumes them as single threaded cores and created
>> cores(boot_cores_nr) that were not same as smp_cores ?
>
> What?  boot_cores_nr has absolutely nothing to do with adjusting the
> topology, and it certainly doesn't assume they're single threaded.

When we start a TCG guest and user provides following commandline, e.g.
"-smp 4", smt_threads is set to 1 by default in vl.c. So the guest boots
with 4 cores, each having 1 thread.

Regards
Nikunj
Nikunj A. Dadhania Sept. 20, 2017, 6:50 a.m. UTC | #12
Nikunj A Dadhania <nikunj@linux.vnet.ibm.com> writes:

>> >> 
>>> >> I think the current approach is the simplest and less intrusive, as we
>>> >> are handling a case where user has not bothered to provide a detailed
>>> >> topology, the best we can do is create single threaded cores equal to
>>> >> number of cores.
>>> >
>>> > No, sorry.  Having smp_cores not correspond to the number of cores per
>>> > chip in all cases is just not ok.  Add an error message if the
>>> > topology isn't workable for powernv by all means.  But users having to
>>> > use a longer command line is better than breaking basic assumptions
>>> > about what numbers reflect what topology.
>>> 
>>> Sorry to ask again, as I am still not convinced, we do similar
>>> adjustment in spapr where the user did not provide the number of cores,
>>> but qemu assumes them as single threaded cores and created
>>> cores(boot_cores_nr) that were not same as smp_cores ?
>>
>> What?  boot_cores_nr has absolutely nothing to do with adjusting the
>> topology, and it certainly doesn't assume they're single threaded.
>
> When we start a TCG guest and user provides following commandline,
> e.g. "-smp 4", smt_threads is set to 1 by default in vl.c.

I meant smp_threads here.

> So the guest boots with 4 cores, each having 1 thread.
>
> Regards
> Nikunj
David Gibson Sept. 20, 2017, 6:57 a.m. UTC | #13
On Wed, Sep 20, 2017 at 12:10:48PM +0530, Nikunj A Dadhania wrote:
> David Gibson <david@gibson.dropbear.id.au> writes:
> 
> > On Wed, Sep 20, 2017 at 10:43:19AM +0530, Nikunj A Dadhania wrote:
> >> David Gibson <david@gibson.dropbear.id.au> writes:
> >> 
> >> > On Wed, Sep 20, 2017 at 09:50:24AM +0530, Nikunj A Dadhania wrote:
> >> >> David Gibson <david@gibson.dropbear.id.au> writes:
> >> >> 
> >> >> > On Fri, Sep 15, 2017 at 02:39:16PM +0530, Nikunj A Dadhania wrote:
> >> >> >> David Gibson <david@gibson.dropbear.id.au> writes:
> >> >> >> 
> >> >> >> > On Fri, Sep 15, 2017 at 01:53:15PM +0530, Nikunj A Dadhania wrote:
> >> >> >> >> David Gibson <david@gibson.dropbear.id.au> writes:
> >> >> >> >> 
> >> >> >> >> >> 
> >> >> >> >> >> I thought, I am doing the same here for PowerNV, number of online cores
> >> >> >> >> >> is equal to initial online vcpus / threads per core
> >> >> >> >> >> 
> >> >> >> >> >>    int boot_cores_nr = smp_cpus / smp_threads;
> >> >> >> >> >> 
> >> >> >> >> >> Only difference that I see in PowerNV is that we have multiple chips
> >> >> >> >> >> (max 2, at the moment)
> >> >> >> >> >> 
> >> >> >> >> >>         cores_per_chip = smp_cpus / (smp_threads * pnv->num_chips);
> >> >> >> >> >
> >> >> >> >> > This doesn't make sense to me.  Cores per chip should *always* equal
> >> >> >> >> > smp_cores, you shouldn't need another calculation for it.
> >> >> >> >> >
> >> >> >> >> >> And in case user has provided sane smp_cores, we use it.
> >> >> >> >> >
> >> >> >> >> > If smp_cores isn't sane, you should simply reject it, not try to fix
> >> >> >> >> > it.  That's just asking for confusion.
> >> >> >> >> 
> >> >> >> >> This is the case where the user does not provide a topology(which is a
> >> >> >> >> valid scenario), not sure we should reject it. So qemu defaults
> >> >> >> >> smp_cores/smt_threads to 1. I think it makes sense to over-ride.
> >> >> >> >
> >> >> >> > If you can find a way to override it by altering smp_cores when it's
> >> >> >> > not explicitly specified, then ok.
> >> >> >> 
> >> >> >> Should I change the global smp_cores here as well ?
> >> >> >
> >> >> > I'm pretty uneasy with that option.
> >> >> 
> >> >> Me too.
> >> >> 
> >> >> > It would take a fair bit of checking to ensure that changing smp_cores
> >> >> > is safe here. An easier to verify option would be to make the generic
> >> >> > logic which splits up an unspecified -smp N into cores and sockets
> >> >> > more flexible, possibly based on machine options for max values.
> >> >> >
> >> >> > That might still be more trouble than its worth.
> >> >> 
> >> >> I think the current approach is the simplest and less intrusive, as we
> >> >> are handling a case where user has not bothered to provide a detailed
> >> >> topology, the best we can do is create single threaded cores equal to
> >> >> number of cores.
> >> >
> >> > No, sorry.  Having smp_cores not correspond to the number of cores per
> >> > chip in all cases is just not ok.  Add an error message if the
> >> > topology isn't workable for powernv by all means.  But users having to
> >> > use a longer command line is better than breaking basic assumptions
> >> > about what numbers reflect what topology.
> >> 
> >> Sorry to ask again, as I am still not convinced, we do similar
> >> adjustment in spapr where the user did not provide the number of cores,
> >> but qemu assumes them as single threaded cores and created
> >> cores(boot_cores_nr) that were not same as smp_cores ?
> >
> > What?  boot_cores_nr has absolutely nothing to do with adjusting the
> > topology, and it certainly doesn't assume they're single threaded.
> 
> When we start a TCG guest and user provides following commandline, e.g.
> "-smp 4", smt_threads is set to 1 by default in vl.c. So the guest boots
> with 4 cores, each having 1 thread.

Ok.. and what's the problem with that behaviour on powernv?
Nikunj A. Dadhania Sept. 20, 2017, 7:18 a.m. UTC | #14
David Gibson <david@gibson.dropbear.id.au> writes:

> On Wed, Sep 20, 2017 at 12:10:48PM +0530, Nikunj A Dadhania wrote:
>> David Gibson <david@gibson.dropbear.id.au> writes:
>> 
>> > On Wed, Sep 20, 2017 at 10:43:19AM +0530, Nikunj A Dadhania wrote:
>> >> David Gibson <david@gibson.dropbear.id.au> writes:
>> >> 
>> >> > On Wed, Sep 20, 2017 at 09:50:24AM +0530, Nikunj A Dadhania wrote:
>> >> >> David Gibson <david@gibson.dropbear.id.au> writes:
>> >> >> 
>> >> >> > On Fri, Sep 15, 2017 at 02:39:16PM +0530, Nikunj A Dadhania wrote:
>> >> >> >> David Gibson <david@gibson.dropbear.id.au> writes:
>> >> >> >> 
>> >> >> >> > On Fri, Sep 15, 2017 at 01:53:15PM +0530, Nikunj A Dadhania wrote:
>> >> >> >> >> David Gibson <david@gibson.dropbear.id.au> writes:
>> >> >> >> >> 
>> >> >> >> >> >> 
>> >> >> >> >> >> I thought, I am doing the same here for PowerNV, number of online cores
>> >> >> >> >> >> is equal to initial online vcpus / threads per core
>> >> >> >> >> >> 
>> >> >> >> >> >>    int boot_cores_nr = smp_cpus / smp_threads;
>> >> >> >> >> >> 
>> >> >> >> >> >> Only difference that I see in PowerNV is that we have multiple chips
>> >> >> >> >> >> (max 2, at the moment)
>> >> >> >> >> >> 
>> >> >> >> >> >>         cores_per_chip = smp_cpus / (smp_threads * pnv->num_chips);
>> >> >> >> >> >
>> >> >> >> >> > This doesn't make sense to me.  Cores per chip should *always* equal
>> >> >> >> >> > smp_cores, you shouldn't need another calculation for it.
>> >> >> >> >> >
>> >> >> >> >> >> And in case user has provided sane smp_cores, we use it.
>> >> >> >> >> >
>> >> >> >> >> > If smp_cores isn't sane, you should simply reject it, not try to fix
>> >> >> >> >> > it.  That's just asking for confusion.
>> >> >> >> >> 
>> >> >> >> >> This is the case where the user does not provide a topology(which is a
>> >> >> >> >> valid scenario), not sure we should reject it. So qemu defaults
>> >> >> >> >> smp_cores/smt_threads to 1. I think it makes sense to over-ride.
>> >> >> >> >
>> >> >> >> > If you can find a way to override it by altering smp_cores when it's
>> >> >> >> > not explicitly specified, then ok.
>> >> >> >> 
>> >> >> >> Should I change the global smp_cores here as well ?
>> >> >> >
>> >> >> > I'm pretty uneasy with that option.
>> >> >> 
>> >> >> Me too.
>> >> >> 
>> >> >> > It would take a fair bit of checking to ensure that changing smp_cores
>> >> >> > is safe here. An easier to verify option would be to make the generic
>> >> >> > logic which splits up an unspecified -smp N into cores and sockets
>> >> >> > more flexible, possibly based on machine options for max values.
>> >> >> >
>> >> >> > That might still be more trouble than its worth.
>> >> >> 
>> >> >> I think the current approach is the simplest and less intrusive, as we
>> >> >> are handling a case where user has not bothered to provide a detailed
>> >> >> topology, the best we can do is create single threaded cores equal to
>> >> >> number of cores.
>> >> >
>> >> > No, sorry.  Having smp_cores not correspond to the number of cores per
>> >> > chip in all cases is just not ok.  Add an error message if the
>> >> > topology isn't workable for powernv by all means.  But users having to
>> >> > use a longer command line is better than breaking basic assumptions
>> >> > about what numbers reflect what topology.
>> >> 
>> >> Sorry to ask again, as I am still not convinced, we do similar
>> >> adjustment in spapr where the user did not provide the number of cores,
>> >> but qemu assumes them as single threaded cores and created
>> >> cores(boot_cores_nr) that were not same as smp_cores ?
>> >
>> > What?  boot_cores_nr has absolutely nothing to do with adjusting the
>> > topology, and it certainly doesn't assume they're single threaded.
>> 
>> When we start a TCG guest and user provides following commandline, e.g.
>> "-smp 4", smt_threads is set to 1 by default in vl.c. So the guest boots
>> with 4 cores, each having 1 thread.
>
> Ok.. and what's the problem with that behaviour on powernv?

As smp_thread defaults to 1 in vl.c, similarly smp_cores also has the
default value of 1 in vl.c. In powernv, we were setting nr-cores like
this:

        object_property_set_int(chip, smp_cores, "nr-cores", &error_fatal);

Even when there were multiple cpus (-smp 4), when the guest boots up, we
just get one core (i.e. smp_cores was 1) with single thread(smp_threads
was 1), which is wrong as per the command-line that was provided.

Regards
Nikunj
Cédric Le Goater Sept. 20, 2017, 8:12 a.m. UTC | #15
On 09/20/2017 09:18 AM, Nikunj A Dadhania wrote:
> David Gibson <david@gibson.dropbear.id.au> writes:
> 
>> On Wed, Sep 20, 2017 at 12:10:48PM +0530, Nikunj A Dadhania wrote:
>>> David Gibson <david@gibson.dropbear.id.au> writes:
>>>
>>>> On Wed, Sep 20, 2017 at 10:43:19AM +0530, Nikunj A Dadhania wrote:
>>>>> David Gibson <david@gibson.dropbear.id.au> writes:
>>>>>
>>>>>> On Wed, Sep 20, 2017 at 09:50:24AM +0530, Nikunj A Dadhania wrote:
>>>>>>> David Gibson <david@gibson.dropbear.id.au> writes:
>>>>>>>
>>>>>>>> On Fri, Sep 15, 2017 at 02:39:16PM +0530, Nikunj A Dadhania wrote:
>>>>>>>>> David Gibson <david@gibson.dropbear.id.au> writes:
>>>>>>>>>
>>>>>>>>>> On Fri, Sep 15, 2017 at 01:53:15PM +0530, Nikunj A Dadhania wrote:
>>>>>>>>>>> David Gibson <david@gibson.dropbear.id.au> writes:
>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>> I thought, I am doing the same here for PowerNV, number of online cores
>>>>>>>>>>>>> is equal to initial online vcpus / threads per core
>>>>>>>>>>>>>
>>>>>>>>>>>>>    int boot_cores_nr = smp_cpus / smp_threads;
>>>>>>>>>>>>>
>>>>>>>>>>>>> Only difference that I see in PowerNV is that we have multiple chips
>>>>>>>>>>>>> (max 2, at the moment)
>>>>>>>>>>>>>
>>>>>>>>>>>>>         cores_per_chip = smp_cpus / (smp_threads * pnv->num_chips);
>>>>>>>>>>>>
>>>>>>>>>>>> This doesn't make sense to me.  Cores per chip should *always* equal
>>>>>>>>>>>> smp_cores, you shouldn't need another calculation for it.
>>>>>>>>>>>>
>>>>>>>>>>>>> And in case user has provided sane smp_cores, we use it.
>>>>>>>>>>>>
>>>>>>>>>>>> If smp_cores isn't sane, you should simply reject it, not try to fix
>>>>>>>>>>>> it.  That's just asking for confusion.
>>>>>>>>>>>
>>>>>>>>>>> This is the case where the user does not provide a topology(which is a
>>>>>>>>>>> valid scenario), not sure we should reject it. So qemu defaults
>>>>>>>>>>> smp_cores/smt_threads to 1. I think it makes sense to over-ride.
>>>>>>>>>>
>>>>>>>>>> If you can find a way to override it by altering smp_cores when it's
>>>>>>>>>> not explicitly specified, then ok.
>>>>>>>>>
>>>>>>>>> Should I change the global smp_cores here as well ?
>>>>>>>>
>>>>>>>> I'm pretty uneasy with that option.
>>>>>>>
>>>>>>> Me too.
>>>>>>>
>>>>>>>> It would take a fair bit of checking to ensure that changing smp_cores
>>>>>>>> is safe here. An easier to verify option would be to make the generic
>>>>>>>> logic which splits up an unspecified -smp N into cores and sockets
>>>>>>>> more flexible, possibly based on machine options for max values.
>>>>>>>>
>>>>>>>> That might still be more trouble than its worth.
>>>>>>>
>>>>>>> I think the current approach is the simplest and less intrusive, as we
>>>>>>> are handling a case where user has not bothered to provide a detailed
>>>>>>> topology, the best we can do is create single threaded cores equal to
>>>>>>> number of cores.
>>>>>>
>>>>>> No, sorry.  Having smp_cores not correspond to the number of cores per
>>>>>> chip in all cases is just not ok.  Add an error message if the
>>>>>> topology isn't workable for powernv by all means.  But users having to
>>>>>> use a longer command line is better than breaking basic assumptions
>>>>>> about what numbers reflect what topology.
>>>>>
>>>>> Sorry to ask again, as I am still not convinced, we do similar
>>>>> adjustment in spapr where the user did not provide the number of cores,
>>>>> but qemu assumes them as single threaded cores and created
>>>>> cores(boot_cores_nr) that were not same as smp_cores ?
>>>>
>>>> What?  boot_cores_nr has absolutely nothing to do with adjusting the
>>>> topology, and it certainly doesn't assume they're single threaded.
>>>
>>> When we start a TCG guest and user provides following commandline, e.g.
>>> "-smp 4", smt_threads is set to 1 by default in vl.c. So the guest boots
>>> with 4 cores, each having 1 thread.
>>
>> Ok.. and what's the problem with that behaviour on powernv?
> 
> As smp_thread defaults to 1 in vl.c, similarly smp_cores also has the
> default value of 1 in vl.c. In powernv, we were setting nr-cores like
> this:
> 
>         object_property_set_int(chip, smp_cores, "nr-cores", &error_fatal);
> 
> Even when there were multiple cpus (-smp 4), when the guest boots up, we
> just get one core (i.e. smp_cores was 1) with single thread(smp_threads
> was 1), which is wrong as per the command-line that was provided.

I have never noticed as always use the cores= option but if you use
the following on a powernv machine:

 -smp 4				1 cpu
 -smp cpus=4,threads=1		1 cpu
 -smp cores=4,threads=1		4 cpus
 -smp cpus=4,cores=4,threads=1	4 cpus
 -smp cpus=1,cores=4,threads=1	fails
 -smp cpus=4,cores=1,threads=1	1 cpu

Should we be using 'smp_cpus' instead of 'smp_cores' then ? Honestly,
I feel a bit lost with all default behaviors.

C.
David Gibson Sept. 20, 2017, 11:53 a.m. UTC | #16
On Wed, Sep 20, 2017 at 12:48:55PM +0530, Nikunj A Dadhania wrote:
> David Gibson <david@gibson.dropbear.id.au> writes:
> 
> > On Wed, Sep 20, 2017 at 12:10:48PM +0530, Nikunj A Dadhania wrote:
> >> David Gibson <david@gibson.dropbear.id.au> writes:
> >> 
> >> > On Wed, Sep 20, 2017 at 10:43:19AM +0530, Nikunj A Dadhania wrote:
> >> >> David Gibson <david@gibson.dropbear.id.au> writes:
> >> >> 
> >> >> > On Wed, Sep 20, 2017 at 09:50:24AM +0530, Nikunj A Dadhania wrote:
> >> >> >> David Gibson <david@gibson.dropbear.id.au> writes:
> >> >> >> 
> >> >> >> > On Fri, Sep 15, 2017 at 02:39:16PM +0530, Nikunj A Dadhania wrote:
> >> >> >> >> David Gibson <david@gibson.dropbear.id.au> writes:
> >> >> >> >> 
> >> >> >> >> > On Fri, Sep 15, 2017 at 01:53:15PM +0530, Nikunj A Dadhania wrote:
> >> >> >> >> >> David Gibson <david@gibson.dropbear.id.au> writes:
> >> >> >> >> >> 
> >> >> >> >> >> >> 
> >> >> >> >> >> >> I thought, I am doing the same here for PowerNV, number of online cores
> >> >> >> >> >> >> is equal to initial online vcpus / threads per core
> >> >> >> >> >> >> 
> >> >> >> >> >> >>    int boot_cores_nr = smp_cpus / smp_threads;
> >> >> >> >> >> >> 
> >> >> >> >> >> >> Only difference that I see in PowerNV is that we have multiple chips
> >> >> >> >> >> >> (max 2, at the moment)
> >> >> >> >> >> >> 
> >> >> >> >> >> >>         cores_per_chip = smp_cpus / (smp_threads * pnv->num_chips);
> >> >> >> >> >> >
> >> >> >> >> >> > This doesn't make sense to me.  Cores per chip should *always* equal
> >> >> >> >> >> > smp_cores, you shouldn't need another calculation for it.
> >> >> >> >> >> >
> >> >> >> >> >> >> And in case user has provided sane smp_cores, we use it.
> >> >> >> >> >> >
> >> >> >> >> >> > If smp_cores isn't sane, you should simply reject it, not try to fix
> >> >> >> >> >> > it.  That's just asking for confusion.
> >> >> >> >> >> 
> >> >> >> >> >> This is the case where the user does not provide a topology(which is a
> >> >> >> >> >> valid scenario), not sure we should reject it. So qemu defaults
> >> >> >> >> >> smp_cores/smt_threads to 1. I think it makes sense to over-ride.
> >> >> >> >> >
> >> >> >> >> > If you can find a way to override it by altering smp_cores when it's
> >> >> >> >> > not explicitly specified, then ok.
> >> >> >> >> 
> >> >> >> >> Should I change the global smp_cores here as well ?
> >> >> >> >
> >> >> >> > I'm pretty uneasy with that option.
> >> >> >> 
> >> >> >> Me too.
> >> >> >> 
> >> >> >> > It would take a fair bit of checking to ensure that changing smp_cores
> >> >> >> > is safe here. An easier to verify option would be to make the generic
> >> >> >> > logic which splits up an unspecified -smp N into cores and sockets
> >> >> >> > more flexible, possibly based on machine options for max values.
> >> >> >> >
> >> >> >> > That might still be more trouble than its worth.
> >> >> >> 
> >> >> >> I think the current approach is the simplest and less intrusive, as we
> >> >> >> are handling a case where user has not bothered to provide a detailed
> >> >> >> topology, the best we can do is create single threaded cores equal to
> >> >> >> number of cores.
> >> >> >
> >> >> > No, sorry.  Having smp_cores not correspond to the number of cores per
> >> >> > chip in all cases is just not ok.  Add an error message if the
> >> >> > topology isn't workable for powernv by all means.  But users having to
> >> >> > use a longer command line is better than breaking basic assumptions
> >> >> > about what numbers reflect what topology.
> >> >> 
> >> >> Sorry to ask again, as I am still not convinced, we do similar
> >> >> adjustment in spapr where the user did not provide the number of cores,
> >> >> but qemu assumes them as single threaded cores and created
> >> >> cores(boot_cores_nr) that were not same as smp_cores ?
> >> >
> >> > What?  boot_cores_nr has absolutely nothing to do with adjusting the
> >> > topology, and it certainly doesn't assume they're single threaded.
> >> 
> >> When we start a TCG guest and user provides following commandline, e.g.
> >> "-smp 4", smt_threads is set to 1 by default in vl.c. So the guest boots
> >> with 4 cores, each having 1 thread.
> >
> > Ok.. and what's the problem with that behaviour on powernv?
> 
> As smp_thread defaults to 1 in vl.c, similarly smp_cores also has the
> default value of 1 in vl.c. In powernv, we were setting nr-cores like
> this:
> 
>         object_property_set_int(chip, smp_cores, "nr-cores", &error_fatal);
> 
> Even when there were multiple cpus (-smp 4), when the guest boots up, we
> just get one core (i.e. smp_cores was 1) with single thread(smp_threads
> was 1), which is wrong as per the command-line that was provided.

Right, so, -smp 4 defaults to 4 sockets, each with 1 core of 1
thread.  If you can't supply 4 sockets you should error, but you
shouldn't go and change the number of cores per socket.
Nikunj A. Dadhania Sept. 21, 2017, 3:54 a.m. UTC | #17
David Gibson <david@gibson.dropbear.id.au> writes:

> On Wed, Sep 20, 2017 at 12:48:55PM +0530, Nikunj A Dadhania wrote:
>> David Gibson <david@gibson.dropbear.id.au> writes:
>> 
>> > On Wed, Sep 20, 2017 at 12:10:48PM +0530, Nikunj A Dadhania wrote:
>> >> David Gibson <david@gibson.dropbear.id.au> writes:
>> >> 
>> >> > On Wed, Sep 20, 2017 at 10:43:19AM +0530, Nikunj A Dadhania wrote:
>> >> >> David Gibson <david@gibson.dropbear.id.au> writes:
>> >> >> 
>> >> >> > On Wed, Sep 20, 2017 at 09:50:24AM +0530, Nikunj A Dadhania wrote:
>> >> >> >> David Gibson <david@gibson.dropbear.id.au> writes:
>> >> >> >> 
>> >> >> >> > On Fri, Sep 15, 2017 at 02:39:16PM +0530, Nikunj A Dadhania wrote:
>> >> >> >> >> David Gibson <david@gibson.dropbear.id.au> writes:
>> >> >> >> >> 
>> >> >> >> >> > On Fri, Sep 15, 2017 at 01:53:15PM +0530, Nikunj A Dadhania wrote:
>> >> >> >> >> >> David Gibson <david@gibson.dropbear.id.au> writes:
>> >> >> >> >> >> 
>> >> >> >> >> >> >> 
>> >> >> >> >> >> >> I thought, I am doing the same here for PowerNV, number of online cores
>> >> >> >> >> >> >> is equal to initial online vcpus / threads per core
>> >> >> >> >> >> >> 
>> >> >> >> >> >> >>    int boot_cores_nr = smp_cpus / smp_threads;
>> >> >> >> >> >> >> 
>> >> >> >> >> >> >> Only difference that I see in PowerNV is that we have multiple chips
>> >> >> >> >> >> >> (max 2, at the moment)
>> >> >> >> >> >> >> 
>> >> >> >> >> >> >>         cores_per_chip = smp_cpus / (smp_threads * pnv->num_chips);
>> >> >> >> >> >> >
>> >> >> >> >> >> > This doesn't make sense to me.  Cores per chip should *always* equal
>> >> >> >> >> >> > smp_cores, you shouldn't need another calculation for it.
>> >> >> >> >> >> >
>> >> >> >> >> >> >> And in case user has provided sane smp_cores, we use it.
>> >> >> >> >> >> >
>> >> >> >> >> >> > If smp_cores isn't sane, you should simply reject it, not try to fix
>> >> >> >> >> >> > it.  That's just asking for confusion.
>> >> >> >> >> >> 
>> >> >> >> >> >> This is the case where the user does not provide a topology(which is a
>> >> >> >> >> >> valid scenario), not sure we should reject it. So qemu defaults
>> >> >> >> >> >> smp_cores/smt_threads to 1. I think it makes sense to over-ride.
>> >> >> >> >> >
>> >> >> >> >> > If you can find a way to override it by altering smp_cores when it's
>> >> >> >> >> > not explicitly specified, then ok.
>> >> >> >> >> 
>> >> >> >> >> Should I change the global smp_cores here as well ?
>> >> >> >> >
>> >> >> >> > I'm pretty uneasy with that option.
>> >> >> >> 
>> >> >> >> Me too.
>> >> >> >> 
>> >> >> >> > It would take a fair bit of checking to ensure that changing smp_cores
>> >> >> >> > is safe here. An easier to verify option would be to make the generic
>> >> >> >> > logic which splits up an unspecified -smp N into cores and sockets
>> >> >> >> > more flexible, possibly based on machine options for max values.
>> >> >> >> >
>> >> >> >> > That might still be more trouble than its worth.
>> >> >> >> 
>> >> >> >> I think the current approach is the simplest and less intrusive, as we
>> >> >> >> are handling a case where user has not bothered to provide a detailed
>> >> >> >> topology, the best we can do is create single threaded cores equal to
>> >> >> >> number of cores.
>> >> >> >
>> >> >> > No, sorry.  Having smp_cores not correspond to the number of cores per
>> >> >> > chip in all cases is just not ok.  Add an error message if the
>> >> >> > topology isn't workable for powernv by all means.  But users having to
>> >> >> > use a longer command line is better than breaking basic assumptions
>> >> >> > about what numbers reflect what topology.
>> >> >> 
>> >> >> Sorry to ask again, as I am still not convinced, we do similar
>> >> >> adjustment in spapr where the user did not provide the number of cores,
>> >> >> but qemu assumes them as single threaded cores and created
>> >> >> cores(boot_cores_nr) that were not same as smp_cores ?
>> >> >
>> >> > What?  boot_cores_nr has absolutely nothing to do with adjusting the
>> >> > topology, and it certainly doesn't assume they're single threaded.
>> >> 
>> >> When we start a TCG guest and user provides following commandline, e.g.
>> >> "-smp 4", smt_threads is set to 1 by default in vl.c. So the guest boots
>> >> with 4 cores, each having 1 thread.
>> >
>> > Ok.. and what's the problem with that behaviour on powernv?
>> 
>> As smp_thread defaults to 1 in vl.c, similarly smp_cores also has the
>> default value of 1 in vl.c. In powernv, we were setting nr-cores like
>> this:
>> 
>>         object_property_set_int(chip, smp_cores, "nr-cores", &error_fatal);
>> 
>> Even when there were multiple cpus (-smp 4), when the guest boots up, we
>> just get one core (i.e. smp_cores was 1) with single thread(smp_threads
>> was 1), which is wrong as per the command-line that was provided.
>
> Right, so, -smp 4 defaults to 4 sockets, each with 1 core of 1
> thread.  If you can't supply 4 sockets you should error, but you
> shouldn't go and change the number of cores per socket.

OK, that makes sense now. And I do see that smp_cpus is 4 in the above
case. Now looking more into it, i see that powernv has something called
"num_chips", isnt this same as sockets ? Do we need num_chips separately?

Regards,
Nikunj
David Gibson Sept. 21, 2017, 5:31 a.m. UTC | #18
On Thu, Sep 21, 2017 at 09:24:46AM +0530, Nikunj A Dadhania wrote:
> David Gibson <david@gibson.dropbear.id.au> writes:
> 
> > On Wed, Sep 20, 2017 at 12:48:55PM +0530, Nikunj A Dadhania wrote:
> >> David Gibson <david@gibson.dropbear.id.au> writes:
> >> 
> >> > On Wed, Sep 20, 2017 at 12:10:48PM +0530, Nikunj A Dadhania wrote:
> >> >> David Gibson <david@gibson.dropbear.id.au> writes:
> >> >> 
> >> >> > On Wed, Sep 20, 2017 at 10:43:19AM +0530, Nikunj A Dadhania wrote:
> >> >> >> David Gibson <david@gibson.dropbear.id.au> writes:
> >> >> >> 
> >> >> >> > On Wed, Sep 20, 2017 at 09:50:24AM +0530, Nikunj A Dadhania wrote:
> >> >> >> >> David Gibson <david@gibson.dropbear.id.au> writes:
> >> >> >> >> 
> >> >> >> >> > On Fri, Sep 15, 2017 at 02:39:16PM +0530, Nikunj A Dadhania wrote:
> >> >> >> >> >> David Gibson <david@gibson.dropbear.id.au> writes:
> >> >> >> >> >> 
> >> >> >> >> >> > On Fri, Sep 15, 2017 at 01:53:15PM +0530, Nikunj A Dadhania wrote:
> >> >> >> >> >> >> David Gibson <david@gibson.dropbear.id.au> writes:
> >> >> >> >> >> >> 
> >> >> >> >> >> >> >> 
> >> >> >> >> >> >> >> I thought, I am doing the same here for PowerNV, number of online cores
> >> >> >> >> >> >> >> is equal to initial online vcpus / threads per core
> >> >> >> >> >> >> >> 
> >> >> >> >> >> >> >>    int boot_cores_nr = smp_cpus / smp_threads;
> >> >> >> >> >> >> >> 
> >> >> >> >> >> >> >> Only difference that I see in PowerNV is that we have multiple chips
> >> >> >> >> >> >> >> (max 2, at the moment)
> >> >> >> >> >> >> >> 
> >> >> >> >> >> >> >>         cores_per_chip = smp_cpus / (smp_threads * pnv->num_chips);
> >> >> >> >> >> >> >
> >> >> >> >> >> >> > This doesn't make sense to me.  Cores per chip should *always* equal
> >> >> >> >> >> >> > smp_cores, you shouldn't need another calculation for it.
> >> >> >> >> >> >> >
> >> >> >> >> >> >> >> And in case user has provided sane smp_cores, we use it.
> >> >> >> >> >> >> >
> >> >> >> >> >> >> > If smp_cores isn't sane, you should simply reject it, not try to fix
> >> >> >> >> >> >> > it.  That's just asking for confusion.
> >> >> >> >> >> >> 
> >> >> >> >> >> >> This is the case where the user does not provide a topology(which is a
> >> >> >> >> >> >> valid scenario), not sure we should reject it. So qemu defaults
> >> >> >> >> >> >> smp_cores/smt_threads to 1. I think it makes sense to over-ride.
> >> >> >> >> >> >
> >> >> >> >> >> > If you can find a way to override it by altering smp_cores when it's
> >> >> >> >> >> > not explicitly specified, then ok.
> >> >> >> >> >> 
> >> >> >> >> >> Should I change the global smp_cores here as well ?
> >> >> >> >> >
> >> >> >> >> > I'm pretty uneasy with that option.
> >> >> >> >> 
> >> >> >> >> Me too.
> >> >> >> >> 
> >> >> >> >> > It would take a fair bit of checking to ensure that changing smp_cores
> >> >> >> >> > is safe here. An easier to verify option would be to make the generic
> >> >> >> >> > logic which splits up an unspecified -smp N into cores and sockets
> >> >> >> >> > more flexible, possibly based on machine options for max values.
> >> >> >> >> >
> >> >> >> >> > That might still be more trouble than its worth.
> >> >> >> >> 
> >> >> >> >> I think the current approach is the simplest and less intrusive, as we
> >> >> >> >> are handling a case where user has not bothered to provide a detailed
> >> >> >> >> topology, the best we can do is create single threaded cores equal to
> >> >> >> >> number of cores.
> >> >> >> >
> >> >> >> > No, sorry.  Having smp_cores not correspond to the number of cores per
> >> >> >> > chip in all cases is just not ok.  Add an error message if the
> >> >> >> > topology isn't workable for powernv by all means.  But users having to
> >> >> >> > use a longer command line is better than breaking basic assumptions
> >> >> >> > about what numbers reflect what topology.
> >> >> >> 
> >> >> >> Sorry to ask again, as I am still not convinced, we do similar
> >> >> >> adjustment in spapr where the user did not provide the number of cores,
> >> >> >> but qemu assumes them as single threaded cores and created
> >> >> >> cores(boot_cores_nr) that were not same as smp_cores ?
> >> >> >
> >> >> > What?  boot_cores_nr has absolutely nothing to do with adjusting the
> >> >> > topology, and it certainly doesn't assume they're single threaded.
> >> >> 
> >> >> When we start a TCG guest and user provides following commandline, e.g.
> >> >> "-smp 4", smt_threads is set to 1 by default in vl.c. So the guest boots
> >> >> with 4 cores, each having 1 thread.
> >> >
> >> > Ok.. and what's the problem with that behaviour on powernv?
> >> 
> >> As smp_thread defaults to 1 in vl.c, similarly smp_cores also has the
> >> default value of 1 in vl.c. In powernv, we were setting nr-cores like
> >> this:
> >> 
> >>         object_property_set_int(chip, smp_cores, "nr-cores", &error_fatal);
> >> 
> >> Even when there were multiple cpus (-smp 4), when the guest boots up, we
> >> just get one core (i.e. smp_cores was 1) with single thread(smp_threads
> >> was 1), which is wrong as per the command-line that was provided.
> >
> > Right, so, -smp 4 defaults to 4 sockets, each with 1 core of 1
> > thread.  If you can't supply 4 sockets you should error, but you
> > shouldn't go and change the number of cores per socket.
> 
> OK, that makes sense now. And I do see that smp_cpus is 4 in the above
> case. Now looking more into it, i see that powernv has something called
> "num_chips", isnt this same as sockets ? Do we need num_chips separately?

Ah, yes, I see.  It's probably still reasonable to keep num_chips as
an internal variable, rather than using (smp_cpus / smp_cores /
smp_threads) everywhere.  But we shouldn't have it as a direct
user-settable property, instead setting it from the -smp command line
option.
Cédric Le Goater Sept. 21, 2017, 6:04 a.m. UTC | #19
On 09/21/2017 05:54 AM, Nikunj A Dadhania wrote:
> David Gibson <david@gibson.dropbear.id.au> writes:
> 
>> On Wed, Sep 20, 2017 at 12:48:55PM +0530, Nikunj A Dadhania wrote:
>>> David Gibson <david@gibson.dropbear.id.au> writes:
>>>
>>>> On Wed, Sep 20, 2017 at 12:10:48PM +0530, Nikunj A Dadhania wrote:
>>>>> David Gibson <david@gibson.dropbear.id.au> writes:
>>>>>
>>>>>> On Wed, Sep 20, 2017 at 10:43:19AM +0530, Nikunj A Dadhania wrote:
>>>>>>> David Gibson <david@gibson.dropbear.id.au> writes:
>>>>>>>
>>>>>>>> On Wed, Sep 20, 2017 at 09:50:24AM +0530, Nikunj A Dadhania wrote:
>>>>>>>>> David Gibson <david@gibson.dropbear.id.au> writes:
>>>>>>>>>
>>>>>>>>>> On Fri, Sep 15, 2017 at 02:39:16PM +0530, Nikunj A Dadhania wrote:
>>>>>>>>>>> David Gibson <david@gibson.dropbear.id.au> writes:
>>>>>>>>>>>
>>>>>>>>>>>> On Fri, Sep 15, 2017 at 01:53:15PM +0530, Nikunj A Dadhania wrote:
>>>>>>>>>>>>> David Gibson <david@gibson.dropbear.id.au> writes:
>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> I thought, I am doing the same here for PowerNV, number of online cores
>>>>>>>>>>>>>>> is equal to initial online vcpus / threads per core
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>    int boot_cores_nr = smp_cpus / smp_threads;
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> Only difference that I see in PowerNV is that we have multiple chips
>>>>>>>>>>>>>>> (max 2, at the moment)
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>         cores_per_chip = smp_cpus / (smp_threads * pnv->num_chips);
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> This doesn't make sense to me.  Cores per chip should *always* equal
>>>>>>>>>>>>>> smp_cores, you shouldn't need another calculation for it.
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> And in case user has provided sane smp_cores, we use it.
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> If smp_cores isn't sane, you should simply reject it, not try to fix
>>>>>>>>>>>>>> it.  That's just asking for confusion.
>>>>>>>>>>>>>
>>>>>>>>>>>>> This is the case where the user does not provide a topology(which is a
>>>>>>>>>>>>> valid scenario), not sure we should reject it. So qemu defaults
>>>>>>>>>>>>> smp_cores/smt_threads to 1. I think it makes sense to over-ride.
>>>>>>>>>>>>
>>>>>>>>>>>> If you can find a way to override it by altering smp_cores when it's
>>>>>>>>>>>> not explicitly specified, then ok.
>>>>>>>>>>>
>>>>>>>>>>> Should I change the global smp_cores here as well ?
>>>>>>>>>>
>>>>>>>>>> I'm pretty uneasy with that option.
>>>>>>>>>
>>>>>>>>> Me too.
>>>>>>>>>
>>>>>>>>>> It would take a fair bit of checking to ensure that changing smp_cores
>>>>>>>>>> is safe here. An easier to verify option would be to make the generic
>>>>>>>>>> logic which splits up an unspecified -smp N into cores and sockets
>>>>>>>>>> more flexible, possibly based on machine options for max values.
>>>>>>>>>>
>>>>>>>>>> That might still be more trouble than its worth.
>>>>>>>>>
>>>>>>>>> I think the current approach is the simplest and less intrusive, as we
>>>>>>>>> are handling a case where user has not bothered to provide a detailed
>>>>>>>>> topology, the best we can do is create single threaded cores equal to
>>>>>>>>> number of cores.
>>>>>>>>
>>>>>>>> No, sorry.  Having smp_cores not correspond to the number of cores per
>>>>>>>> chip in all cases is just not ok.  Add an error message if the
>>>>>>>> topology isn't workable for powernv by all means.  But users having to
>>>>>>>> use a longer command line is better than breaking basic assumptions
>>>>>>>> about what numbers reflect what topology.
>>>>>>>
>>>>>>> Sorry to ask again, as I am still not convinced, we do similar
>>>>>>> adjustment in spapr where the user did not provide the number of cores,
>>>>>>> but qemu assumes them as single threaded cores and created
>>>>>>> cores(boot_cores_nr) that were not same as smp_cores ?
>>>>>>
>>>>>> What?  boot_cores_nr has absolutely nothing to do with adjusting the
>>>>>> topology, and it certainly doesn't assume they're single threaded.
>>>>>
>>>>> When we start a TCG guest and user provides following commandline, e.g.
>>>>> "-smp 4", smt_threads is set to 1 by default in vl.c. So the guest boots
>>>>> with 4 cores, each having 1 thread.
>>>>
>>>> Ok.. and what's the problem with that behaviour on powernv?
>>>
>>> As smp_thread defaults to 1 in vl.c, similarly smp_cores also has the
>>> default value of 1 in vl.c. In powernv, we were setting nr-cores like
>>> this:
>>>
>>>         object_property_set_int(chip, smp_cores, "nr-cores", &error_fatal);
>>>
>>> Even when there were multiple cpus (-smp 4), when the guest boots up, we
>>> just get one core (i.e. smp_cores was 1) with single thread(smp_threads
>>> was 1), which is wrong as per the command-line that was provided.
>>
>> Right, so, -smp 4 defaults to 4 sockets, each with 1 core of 1
>> thread.  If you can't supply 4 sockets you should error, but you
>> shouldn't go and change the number of cores per socket.
> 
> OK, that makes sense now. And I do see that smp_cpus is 4 in the above
> case. Now looking more into it, i see that powernv has something called
> "num_chips", isnt this same as sockets ? Do we need num_chips separately?

yes that would do for cpus, but how do we retrieve the number of 
sockets ? I don't see a smp_sockets. 

If we start looking at such issues, we should also take into account 
memory distribution :

      -numa node[,mem=size][,cpus=firstcpu[-lastcpu]][,nodeid=node]

would allow us to define a set of cpus per node, cpus should be evenly 
distributed on the nodes though, and also define memory per node, but 
some nodes could be without memory.   

C.
Igor Mammedov Sept. 21, 2017, 7:42 a.m. UTC | #20
On Thu, 21 Sep 2017 08:04:55 +0200
Cédric Le Goater <clg@kaod.org> wrote:

> On 09/21/2017 05:54 AM, Nikunj A Dadhania wrote:
> > David Gibson <david@gibson.dropbear.id.au> writes:
> >   
> >> On Wed, Sep 20, 2017 at 12:48:55PM +0530, Nikunj A Dadhania wrote:  
> >>> David Gibson <david@gibson.dropbear.id.au> writes:
> >>>  
> >>>> On Wed, Sep 20, 2017 at 12:10:48PM +0530, Nikunj A Dadhania wrote:  
> >>>>> David Gibson <david@gibson.dropbear.id.au> writes:
> >>>>>  
> >>>>>> On Wed, Sep 20, 2017 at 10:43:19AM +0530, Nikunj A Dadhania wrote:  
> >>>>>>> David Gibson <david@gibson.dropbear.id.au> writes:
> >>>>>>>  
> >>>>>>>> On Wed, Sep 20, 2017 at 09:50:24AM +0530, Nikunj A Dadhania wrote:  
> >>>>>>>>> David Gibson <david@gibson.dropbear.id.au> writes:
> >>>>>>>>>  
> >>>>>>>>>> On Fri, Sep 15, 2017 at 02:39:16PM +0530, Nikunj A Dadhania wrote:  
> >>>>>>>>>>> David Gibson <david@gibson.dropbear.id.au> writes:
> >>>>>>>>>>>  
> >>>>>>>>>>>> On Fri, Sep 15, 2017 at 01:53:15PM +0530, Nikunj A Dadhania wrote:  
> >>>>>>>>>>>>> David Gibson <david@gibson.dropbear.id.au> writes:
> >>>>>>>>>>>>>  
> >>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>> I thought, I am doing the same here for PowerNV, number of online cores
> >>>>>>>>>>>>>>> is equal to initial online vcpus / threads per core
> >>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>    int boot_cores_nr = smp_cpus / smp_threads;
> >>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>> Only difference that I see in PowerNV is that we have multiple chips
> >>>>>>>>>>>>>>> (max 2, at the moment)
> >>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>         cores_per_chip = smp_cpus / (smp_threads * pnv->num_chips);  
> >>>>>>>>>>>>>>
> >>>>>>>>>>>>>> This doesn't make sense to me.  Cores per chip should *always* equal
> >>>>>>>>>>>>>> smp_cores, you shouldn't need another calculation for it.
> >>>>>>>>>>>>>>  
> >>>>>>>>>>>>>>> And in case user has provided sane smp_cores, we use it.  
> >>>>>>>>>>>>>>
> >>>>>>>>>>>>>> If smp_cores isn't sane, you should simply reject it, not try to fix
> >>>>>>>>>>>>>> it.  That's just asking for confusion.  
> >>>>>>>>>>>>>
> >>>>>>>>>>>>> This is the case where the user does not provide a topology(which is a
> >>>>>>>>>>>>> valid scenario), not sure we should reject it. So qemu defaults
> >>>>>>>>>>>>> smp_cores/smt_threads to 1. I think it makes sense to over-ride.  
> >>>>>>>>>>>>
> >>>>>>>>>>>> If you can find a way to override it by altering smp_cores when it's
> >>>>>>>>>>>> not explicitly specified, then ok.  
> >>>>>>>>>>>
> >>>>>>>>>>> Should I change the global smp_cores here as well ?  
> >>>>>>>>>>
> >>>>>>>>>> I'm pretty uneasy with that option.  
> >>>>>>>>>
> >>>>>>>>> Me too.
> >>>>>>>>>  
> >>>>>>>>>> It would take a fair bit of checking to ensure that changing smp_cores
> >>>>>>>>>> is safe here. An easier to verify option would be to make the generic
> >>>>>>>>>> logic which splits up an unspecified -smp N into cores and sockets
> >>>>>>>>>> more flexible, possibly based on machine options for max values.
> >>>>>>>>>>
> >>>>>>>>>> That might still be more trouble than its worth.  
> >>>>>>>>>
> >>>>>>>>> I think the current approach is the simplest and less intrusive, as we
> >>>>>>>>> are handling a case where user has not bothered to provide a detailed
> >>>>>>>>> topology, the best we can do is create single threaded cores equal to
> >>>>>>>>> number of cores.  
> >>>>>>>>
> >>>>>>>> No, sorry.  Having smp_cores not correspond to the number of cores per
> >>>>>>>> chip in all cases is just not ok.  Add an error message if the
> >>>>>>>> topology isn't workable for powernv by all means.  But users having to
> >>>>>>>> use a longer command line is better than breaking basic assumptions
> >>>>>>>> about what numbers reflect what topology.  
> >>>>>>>
> >>>>>>> Sorry to ask again, as I am still not convinced, we do similar
> >>>>>>> adjustment in spapr where the user did not provide the number of cores,
> >>>>>>> but qemu assumes them as single threaded cores and created
> >>>>>>> cores(boot_cores_nr) that were not same as smp_cores ?  
> >>>>>>
> >>>>>> What?  boot_cores_nr has absolutely nothing to do with adjusting the
> >>>>>> topology, and it certainly doesn't assume they're single threaded.  
> >>>>>
> >>>>> When we start a TCG guest and user provides following commandline, e.g.
> >>>>> "-smp 4", smt_threads is set to 1 by default in vl.c. So the guest boots
> >>>>> with 4 cores, each having 1 thread.  
> >>>>
> >>>> Ok.. and what's the problem with that behaviour on powernv?  
> >>>
> >>> As smp_thread defaults to 1 in vl.c, similarly smp_cores also has the
> >>> default value of 1 in vl.c. In powernv, we were setting nr-cores like
> >>> this:
> >>>
> >>>         object_property_set_int(chip, smp_cores, "nr-cores", &error_fatal);
> >>>
> >>> Even when there were multiple cpus (-smp 4), when the guest boots up, we
> >>> just get one core (i.e. smp_cores was 1) with single thread(smp_threads
> >>> was 1), which is wrong as per the command-line that was provided.  
> >>
> >> Right, so, -smp 4 defaults to 4 sockets, each with 1 core of 1
> >> thread.  If you can't supply 4 sockets you should error, but you
> >> shouldn't go and change the number of cores per socket.  
> > 
> > OK, that makes sense now. And I do see that smp_cpus is 4 in the above
> > case. Now looking more into it, i see that powernv has something called
> > "num_chips", isnt this same as sockets ? Do we need num_chips separately?  
> 
> yes that would do for cpus, but how do we retrieve the number of 
> sockets ? I don't see a smp_sockets. 
I'd suggest to rewrite QEMU again :)

more exactly, -smp parsing is global and sometimes doesn't suite
target device model/machine.
Idea was to make it's options machine properties to get rid of globals
and then let leaf machine redefine parsing behaviour.
here is Drew's take on it:

[Qemu-devel] [PATCH RFC 00/16] Rework SMP parameters
https://www.mail-archive.com/qemu-devel@nongnu.org/msg376961.html

considering there weren't pressing need, the series has been pushed
to the end of TODO list. Maybe you can revive it and make work for
pnv and other machines.


> If we start looking at such issues, we should also take into account 
> memory distribution :
> 
>       -numa node[,mem=size][,cpus=firstcpu[-lastcpu]][,nodeid=node]
it's interface based on cpu_index, which internal qemu number for
an 1 cpu execution context.

It would be better if one would use new interface with new machines
 -numa cpu,node-id=0,socket-id=0 ...

> 
> would allow us to define a set of cpus per node, cpus should be evenly 
> distributed on the nodes though, and also define memory per node, but 
> some nodes could be without memory.   
> 
> C. 
>
David Gibson Sept. 22, 2017, 10:08 a.m. UTC | #21
On Thu, Sep 21, 2017 at 08:04:55AM +0200, Cédric Le Goater wrote:
> On 09/21/2017 05:54 AM, Nikunj A Dadhania wrote:
> > David Gibson <david@gibson.dropbear.id.au> writes:
> > 
> >> On Wed, Sep 20, 2017 at 12:48:55PM +0530, Nikunj A Dadhania wrote:
> >>> David Gibson <david@gibson.dropbear.id.au> writes:
> >>>
> >>>> On Wed, Sep 20, 2017 at 12:10:48PM +0530, Nikunj A Dadhania wrote:
> >>>>> David Gibson <david@gibson.dropbear.id.au> writes:
> >>>>>
> >>>>>> On Wed, Sep 20, 2017 at 10:43:19AM +0530, Nikunj A Dadhania wrote:
> >>>>>>> David Gibson <david@gibson.dropbear.id.au> writes:
> >>>>>>>
> >>>>>>>> On Wed, Sep 20, 2017 at 09:50:24AM +0530, Nikunj A Dadhania wrote:
> >>>>>>>>> David Gibson <david@gibson.dropbear.id.au> writes:
> >>>>>>>>>
> >>>>>>>>>> On Fri, Sep 15, 2017 at 02:39:16PM +0530, Nikunj A Dadhania wrote:
> >>>>>>>>>>> David Gibson <david@gibson.dropbear.id.au> writes:
> >>>>>>>>>>>
> >>>>>>>>>>>> On Fri, Sep 15, 2017 at 01:53:15PM +0530, Nikunj A Dadhania wrote:
> >>>>>>>>>>>>> David Gibson <david@gibson.dropbear.id.au> writes:
> >>>>>>>>>>>>>
> >>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>> I thought, I am doing the same here for PowerNV, number of online cores
> >>>>>>>>>>>>>>> is equal to initial online vcpus / threads per core
> >>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>    int boot_cores_nr = smp_cpus / smp_threads;
> >>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>> Only difference that I see in PowerNV is that we have multiple chips
> >>>>>>>>>>>>>>> (max 2, at the moment)
> >>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>         cores_per_chip = smp_cpus / (smp_threads * pnv->num_chips);
> >>>>>>>>>>>>>>
> >>>>>>>>>>>>>> This doesn't make sense to me.  Cores per chip should *always* equal
> >>>>>>>>>>>>>> smp_cores, you shouldn't need another calculation for it.
> >>>>>>>>>>>>>>
> >>>>>>>>>>>>>>> And in case user has provided sane smp_cores, we use it.
> >>>>>>>>>>>>>>
> >>>>>>>>>>>>>> If smp_cores isn't sane, you should simply reject it, not try to fix
> >>>>>>>>>>>>>> it.  That's just asking for confusion.
> >>>>>>>>>>>>>
> >>>>>>>>>>>>> This is the case where the user does not provide a topology(which is a
> >>>>>>>>>>>>> valid scenario), not sure we should reject it. So qemu defaults
> >>>>>>>>>>>>> smp_cores/smt_threads to 1. I think it makes sense to over-ride.
> >>>>>>>>>>>>
> >>>>>>>>>>>> If you can find a way to override it by altering smp_cores when it's
> >>>>>>>>>>>> not explicitly specified, then ok.
> >>>>>>>>>>>
> >>>>>>>>>>> Should I change the global smp_cores here as well ?
> >>>>>>>>>>
> >>>>>>>>>> I'm pretty uneasy with that option.
> >>>>>>>>>
> >>>>>>>>> Me too.
> >>>>>>>>>
> >>>>>>>>>> It would take a fair bit of checking to ensure that changing smp_cores
> >>>>>>>>>> is safe here. An easier to verify option would be to make the generic
> >>>>>>>>>> logic which splits up an unspecified -smp N into cores and sockets
> >>>>>>>>>> more flexible, possibly based on machine options for max values.
> >>>>>>>>>>
> >>>>>>>>>> That might still be more trouble than its worth.
> >>>>>>>>>
> >>>>>>>>> I think the current approach is the simplest and less intrusive, as we
> >>>>>>>>> are handling a case where user has not bothered to provide a detailed
> >>>>>>>>> topology, the best we can do is create single threaded cores equal to
> >>>>>>>>> number of cores.
> >>>>>>>>
> >>>>>>>> No, sorry.  Having smp_cores not correspond to the number of cores per
> >>>>>>>> chip in all cases is just not ok.  Add an error message if the
> >>>>>>>> topology isn't workable for powernv by all means.  But users having to
> >>>>>>>> use a longer command line is better than breaking basic assumptions
> >>>>>>>> about what numbers reflect what topology.
> >>>>>>>
> >>>>>>> Sorry to ask again, as I am still not convinced, we do similar
> >>>>>>> adjustment in spapr where the user did not provide the number of cores,
> >>>>>>> but qemu assumes them as single threaded cores and created
> >>>>>>> cores(boot_cores_nr) that were not same as smp_cores ?
> >>>>>>
> >>>>>> What?  boot_cores_nr has absolutely nothing to do with adjusting the
> >>>>>> topology, and it certainly doesn't assume they're single threaded.
> >>>>>
> >>>>> When we start a TCG guest and user provides following commandline, e.g.
> >>>>> "-smp 4", smt_threads is set to 1 by default in vl.c. So the guest boots
> >>>>> with 4 cores, each having 1 thread.
> >>>>
> >>>> Ok.. and what's the problem with that behaviour on powernv?
> >>>
> >>> As smp_thread defaults to 1 in vl.c, similarly smp_cores also has the
> >>> default value of 1 in vl.c. In powernv, we were setting nr-cores like
> >>> this:
> >>>
> >>>         object_property_set_int(chip, smp_cores, "nr-cores", &error_fatal);
> >>>
> >>> Even when there were multiple cpus (-smp 4), when the guest boots up, we
> >>> just get one core (i.e. smp_cores was 1) with single thread(smp_threads
> >>> was 1), which is wrong as per the command-line that was provided.
> >>
> >> Right, so, -smp 4 defaults to 4 sockets, each with 1 core of 1
> >> thread.  If you can't supply 4 sockets you should error, but you
> >> shouldn't go and change the number of cores per socket.
> > 
> > OK, that makes sense now. And I do see that smp_cpus is 4 in the above
> > case. Now looking more into it, i see that powernv has something called
> > "num_chips", isnt this same as sockets ? Do we need num_chips separately?
> 
> yes that would do for cpus, but how do we retrieve the number of 
> sockets ? I don't see a smp_sockets. 

	# sockets = smp_cpus / smp_threads / smp_cores

Or, if you want the maximum possible number of sockets (for a fully
populated system)
	# sockets = max_cpus / smp_threads / smp_cores
	

> If we start looking at such issues, we should also take into account 
> memory distribution :
> 
>       -numa node[,mem=size][,cpus=firstcpu[-lastcpu]][,nodeid=node]
> 
> would allow us to define a set of cpus per node, cpus should be evenly 
> distributed on the nodes though, and also define memory per node, but 
> some nodes could be without memory.

I don't really see what that has to do with anything.  We already have
ways to assign memory or cpus to specific nodes if we want.
David Gibson Sept. 22, 2017, 10:09 a.m. UTC | #22
On Thu, Sep 21, 2017 at 09:42:26AM +0200, Igor Mammedov wrote:
> On Thu, 21 Sep 2017 08:04:55 +0200
> Cédric Le Goater <clg@kaod.org> wrote:
> 
> > On 09/21/2017 05:54 AM, Nikunj A Dadhania wrote:
> > > David Gibson <david@gibson.dropbear.id.au> writes:
> > >   
> > >> On Wed, Sep 20, 2017 at 12:48:55PM +0530, Nikunj A Dadhania wrote:  
> > >>> David Gibson <david@gibson.dropbear.id.au> writes:
> > >>>  
> > >>>> On Wed, Sep 20, 2017 at 12:10:48PM +0530, Nikunj A Dadhania wrote:  
> > >>>>> David Gibson <david@gibson.dropbear.id.au> writes:
> > >>>>>  
> > >>>>>> On Wed, Sep 20, 2017 at 10:43:19AM +0530, Nikunj A Dadhania wrote:  
> > >>>>>>> David Gibson <david@gibson.dropbear.id.au> writes:
> > >>>>>>>  
> > >>>>>>>> On Wed, Sep 20, 2017 at 09:50:24AM +0530, Nikunj A Dadhania wrote:  
> > >>>>>>>>> David Gibson <david@gibson.dropbear.id.au> writes:
> > >>>>>>>>>  
> > >>>>>>>>>> On Fri, Sep 15, 2017 at 02:39:16PM +0530, Nikunj A Dadhania wrote:  
> > >>>>>>>>>>> David Gibson <david@gibson.dropbear.id.au> writes:
> > >>>>>>>>>>>  
> > >>>>>>>>>>>> On Fri, Sep 15, 2017 at 01:53:15PM +0530, Nikunj A Dadhania wrote:  
> > >>>>>>>>>>>>> David Gibson <david@gibson.dropbear.id.au> writes:
> > >>>>>>>>>>>>>  
> > >>>>>>>>>>>>>>>
> > >>>>>>>>>>>>>>> I thought, I am doing the same here for PowerNV, number of online cores
> > >>>>>>>>>>>>>>> is equal to initial online vcpus / threads per core
> > >>>>>>>>>>>>>>>
> > >>>>>>>>>>>>>>>    int boot_cores_nr = smp_cpus / smp_threads;
> > >>>>>>>>>>>>>>>
> > >>>>>>>>>>>>>>> Only difference that I see in PowerNV is that we have multiple chips
> > >>>>>>>>>>>>>>> (max 2, at the moment)
> > >>>>>>>>>>>>>>>
> > >>>>>>>>>>>>>>>         cores_per_chip = smp_cpus / (smp_threads * pnv->num_chips);  
> > >>>>>>>>>>>>>>
> > >>>>>>>>>>>>>> This doesn't make sense to me.  Cores per chip should *always* equal
> > >>>>>>>>>>>>>> smp_cores, you shouldn't need another calculation for it.
> > >>>>>>>>>>>>>>  
> > >>>>>>>>>>>>>>> And in case user has provided sane smp_cores, we use it.  
> > >>>>>>>>>>>>>>
> > >>>>>>>>>>>>>> If smp_cores isn't sane, you should simply reject it, not try to fix
> > >>>>>>>>>>>>>> it.  That's just asking for confusion.  
> > >>>>>>>>>>>>>
> > >>>>>>>>>>>>> This is the case where the user does not provide a topology(which is a
> > >>>>>>>>>>>>> valid scenario), not sure we should reject it. So qemu defaults
> > >>>>>>>>>>>>> smp_cores/smt_threads to 1. I think it makes sense to over-ride.  
> > >>>>>>>>>>>>
> > >>>>>>>>>>>> If you can find a way to override it by altering smp_cores when it's
> > >>>>>>>>>>>> not explicitly specified, then ok.  
> > >>>>>>>>>>>
> > >>>>>>>>>>> Should I change the global smp_cores here as well ?  
> > >>>>>>>>>>
> > >>>>>>>>>> I'm pretty uneasy with that option.  
> > >>>>>>>>>
> > >>>>>>>>> Me too.
> > >>>>>>>>>  
> > >>>>>>>>>> It would take a fair bit of checking to ensure that changing smp_cores
> > >>>>>>>>>> is safe here. An easier to verify option would be to make the generic
> > >>>>>>>>>> logic which splits up an unspecified -smp N into cores and sockets
> > >>>>>>>>>> more flexible, possibly based on machine options for max values.
> > >>>>>>>>>>
> > >>>>>>>>>> That might still be more trouble than its worth.  
> > >>>>>>>>>
> > >>>>>>>>> I think the current approach is the simplest and less intrusive, as we
> > >>>>>>>>> are handling a case where user has not bothered to provide a detailed
> > >>>>>>>>> topology, the best we can do is create single threaded cores equal to
> > >>>>>>>>> number of cores.  
> > >>>>>>>>
> > >>>>>>>> No, sorry.  Having smp_cores not correspond to the number of cores per
> > >>>>>>>> chip in all cases is just not ok.  Add an error message if the
> > >>>>>>>> topology isn't workable for powernv by all means.  But users having to
> > >>>>>>>> use a longer command line is better than breaking basic assumptions
> > >>>>>>>> about what numbers reflect what topology.  
> > >>>>>>>
> > >>>>>>> Sorry to ask again, as I am still not convinced, we do similar
> > >>>>>>> adjustment in spapr where the user did not provide the number of cores,
> > >>>>>>> but qemu assumes them as single threaded cores and created
> > >>>>>>> cores(boot_cores_nr) that were not same as smp_cores ?  
> > >>>>>>
> > >>>>>> What?  boot_cores_nr has absolutely nothing to do with adjusting the
> > >>>>>> topology, and it certainly doesn't assume they're single threaded.  
> > >>>>>
> > >>>>> When we start a TCG guest and user provides following commandline, e.g.
> > >>>>> "-smp 4", smt_threads is set to 1 by default in vl.c. So the guest boots
> > >>>>> with 4 cores, each having 1 thread.  
> > >>>>
> > >>>> Ok.. and what's the problem with that behaviour on powernv?  
> > >>>
> > >>> As smp_thread defaults to 1 in vl.c, similarly smp_cores also has the
> > >>> default value of 1 in vl.c. In powernv, we were setting nr-cores like
> > >>> this:
> > >>>
> > >>>         object_property_set_int(chip, smp_cores, "nr-cores", &error_fatal);
> > >>>
> > >>> Even when there were multiple cpus (-smp 4), when the guest boots up, we
> > >>> just get one core (i.e. smp_cores was 1) with single thread(smp_threads
> > >>> was 1), which is wrong as per the command-line that was provided.  
> > >>
> > >> Right, so, -smp 4 defaults to 4 sockets, each with 1 core of 1
> > >> thread.  If you can't supply 4 sockets you should error, but you
> > >> shouldn't go and change the number of cores per socket.  
> > > 
> > > OK, that makes sense now. And I do see that smp_cpus is 4 in the above
> > > case. Now looking more into it, i see that powernv has something called
> > > "num_chips", isnt this same as sockets ? Do we need num_chips separately?  
> > 
> > yes that would do for cpus, but how do we retrieve the number of 
> > sockets ? I don't see a smp_sockets. 
> I'd suggest to rewrite QEMU again :)
> 
> more exactly, -smp parsing is global and sometimes doesn't suite
> target device model/machine.
> Idea was to make it's options machine properties to get rid of globals
> and then let leaf machine redefine parsing behaviour.
> here is Drew's take on it:
> 
> [Qemu-devel] [PATCH RFC 00/16] Rework SMP parameters
> https://www.mail-archive.com/qemu-devel@nongnu.org/msg376961.html
> 
> considering there weren't pressing need, the series has been pushed
> to the end of TODO list. Maybe you can revive it and make work for
> pnv and other machines.

Right, making the core smp parsing more flexible might be good.
Cédric Le Goater Sept. 22, 2017, 10:52 a.m. UTC | #23
On 09/22/2017 12:08 PM, David Gibson wrote:
> On Thu, Sep 21, 2017 at 08:04:55AM +0200, Cédric Le Goater wrote:
>> On 09/21/2017 05:54 AM, Nikunj A Dadhania wrote:
>>> David Gibson <david@gibson.dropbear.id.au> writes:
>>>
>>>> On Wed, Sep 20, 2017 at 12:48:55PM +0530, Nikunj A Dadhania wrote:
>>>>> David Gibson <david@gibson.dropbear.id.au> writes:
>>>>>
>>>>>> On Wed, Sep 20, 2017 at 12:10:48PM +0530, Nikunj A Dadhania wrote:
>>>>>>> David Gibson <david@gibson.dropbear.id.au> writes:
>>>>>>>
>>>>>>>> On Wed, Sep 20, 2017 at 10:43:19AM +0530, Nikunj A Dadhania wrote:
>>>>>>>>> David Gibson <david@gibson.dropbear.id.au> writes:
>>>>>>>>>
>>>>>>>>>> On Wed, Sep 20, 2017 at 09:50:24AM +0530, Nikunj A Dadhania wrote:
>>>>>>>>>>> David Gibson <david@gibson.dropbear.id.au> writes:
>>>>>>>>>>>
>>>>>>>>>>>> On Fri, Sep 15, 2017 at 02:39:16PM +0530, Nikunj A Dadhania wrote:
>>>>>>>>>>>>> David Gibson <david@gibson.dropbear.id.au> writes:
>>>>>>>>>>>>>
>>>>>>>>>>>>>> On Fri, Sep 15, 2017 at 01:53:15PM +0530, Nikunj A Dadhania wrote:
>>>>>>>>>>>>>>> David Gibson <david@gibson.dropbear.id.au> writes:
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> I thought, I am doing the same here for PowerNV, number of online cores
>>>>>>>>>>>>>>>>> is equal to initial online vcpus / threads per core
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>    int boot_cores_nr = smp_cpus / smp_threads;
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> Only difference that I see in PowerNV is that we have multiple chips
>>>>>>>>>>>>>>>>> (max 2, at the moment)
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>         cores_per_chip = smp_cpus / (smp_threads * pnv->num_chips);
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> This doesn't make sense to me.  Cores per chip should *always* equal
>>>>>>>>>>>>>>>> smp_cores, you shouldn't need another calculation for it.
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> And in case user has provided sane smp_cores, we use it.
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> If smp_cores isn't sane, you should simply reject it, not try to fix
>>>>>>>>>>>>>>>> it.  That's just asking for confusion.
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> This is the case where the user does not provide a topology(which is a
>>>>>>>>>>>>>>> valid scenario), not sure we should reject it. So qemu defaults
>>>>>>>>>>>>>>> smp_cores/smt_threads to 1. I think it makes sense to over-ride.
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> If you can find a way to override it by altering smp_cores when it's
>>>>>>>>>>>>>> not explicitly specified, then ok.
>>>>>>>>>>>>>
>>>>>>>>>>>>> Should I change the global smp_cores here as well ?
>>>>>>>>>>>>
>>>>>>>>>>>> I'm pretty uneasy with that option.
>>>>>>>>>>>
>>>>>>>>>>> Me too.
>>>>>>>>>>>
>>>>>>>>>>>> It would take a fair bit of checking to ensure that changing smp_cores
>>>>>>>>>>>> is safe here. An easier to verify option would be to make the generic
>>>>>>>>>>>> logic which splits up an unspecified -smp N into cores and sockets
>>>>>>>>>>>> more flexible, possibly based on machine options for max values.
>>>>>>>>>>>>
>>>>>>>>>>>> That might still be more trouble than its worth.
>>>>>>>>>>>
>>>>>>>>>>> I think the current approach is the simplest and less intrusive, as we
>>>>>>>>>>> are handling a case where user has not bothered to provide a detailed
>>>>>>>>>>> topology, the best we can do is create single threaded cores equal to
>>>>>>>>>>> number of cores.
>>>>>>>>>>
>>>>>>>>>> No, sorry.  Having smp_cores not correspond to the number of cores per
>>>>>>>>>> chip in all cases is just not ok.  Add an error message if the
>>>>>>>>>> topology isn't workable for powernv by all means.  But users having to
>>>>>>>>>> use a longer command line is better than breaking basic assumptions
>>>>>>>>>> about what numbers reflect what topology.
>>>>>>>>>
>>>>>>>>> Sorry to ask again, as I am still not convinced, we do similar
>>>>>>>>> adjustment in spapr where the user did not provide the number of cores,
>>>>>>>>> but qemu assumes them as single threaded cores and created
>>>>>>>>> cores(boot_cores_nr) that were not same as smp_cores ?
>>>>>>>>
>>>>>>>> What?  boot_cores_nr has absolutely nothing to do with adjusting the
>>>>>>>> topology, and it certainly doesn't assume they're single threaded.
>>>>>>>
>>>>>>> When we start a TCG guest and user provides following commandline, e.g.
>>>>>>> "-smp 4", smt_threads is set to 1 by default in vl.c. So the guest boots
>>>>>>> with 4 cores, each having 1 thread.
>>>>>>
>>>>>> Ok.. and what's the problem with that behaviour on powernv?
>>>>>
>>>>> As smp_thread defaults to 1 in vl.c, similarly smp_cores also has the
>>>>> default value of 1 in vl.c. In powernv, we were setting nr-cores like
>>>>> this:
>>>>>
>>>>>         object_property_set_int(chip, smp_cores, "nr-cores", &error_fatal);
>>>>>
>>>>> Even when there were multiple cpus (-smp 4), when the guest boots up, we
>>>>> just get one core (i.e. smp_cores was 1) with single thread(smp_threads
>>>>> was 1), which is wrong as per the command-line that was provided.
>>>>
>>>> Right, so, -smp 4 defaults to 4 sockets, each with 1 core of 1
>>>> thread.  If you can't supply 4 sockets you should error, but you
>>>> shouldn't go and change the number of cores per socket.
>>>
>>> OK, that makes sense now. And I do see that smp_cpus is 4 in the above
>>> case. Now looking more into it, i see that powernv has something called
>>> "num_chips", isnt this same as sockets ? Do we need num_chips separately?
>>
>> yes that would do for cpus, but how do we retrieve the number of 
>> sockets ? I don't see a smp_sockets. 
> 
> 	# sockets = smp_cpus / smp_threads / smp_cores
> 
> Or, if you want the maximum possible number of sockets (for a fully
> populated system)
> 	# sockets = max_cpus / smp_threads / smp_cores
ok. that would do for a default setting. Sorry for the noise.

>> If we start looking at such issues, we should also take into account 
>> memory distribution :
>>
>>       -numa node[,mem=size][,cpus=firstcpu[-lastcpu]][,nodeid=node]
>>
>> would allow us to define a set of cpus per node, cpus should be evenly 
>> distributed on the nodes though, and also define memory per node, but 
>> some nodes could be without memory.
> 
> I don't really see what that has to do with anything.  We already have
> ways to assign memory or cpus to specific nodes if we want.

We will see when the needs come if numa options fit the requirements.

C.
diff mbox

Patch

diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index 9724719..3fbaafb 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -642,7 +642,7 @@  static void ppc_powernv_init(MachineState *machine)
     MemoryRegion *ram;
     char *fw_filename;
     long fw_size;
-    int i;
+    int i, cores_per_chip;
     char *chip_typename;
     PCIBus *pbus;
     bool has_gfx = false;
@@ -710,6 +710,22 @@  static void ppc_powernv_init(MachineState *machine)
     }
 
     pnv->chips = g_new0(PnvChip *, pnv->num_chips);
+
+    /* If user has specified number of cores, use it. Otherwise, compute it. */
+    if (smp_cores != 1) {
+        cores_per_chip = smp_cores;
+    } else {
+        cores_per_chip = smp_cpus / (smp_threads * pnv->num_chips);
+    }
+
+    if (smp_cpus != (smp_threads * pnv->num_chips * cores_per_chip)) {
+        error_report("cpu topology not balanced: "
+                     "chips (%u) * cores (%u) * threads (%u) != "
+                     "number of cpus (%u)",
+                     pnv->num_chips, cores_per_chip, smp_threads, smp_cpus);
+        exit(1);
+    }
+
     for (i = 0; i < pnv->num_chips; i++) {
         char chip_name[32];
         Object *chip = object_new(chip_typename);
@@ -728,7 +744,7 @@  static void ppc_powernv_init(MachineState *machine)
         object_property_add_child(OBJECT(pnv), chip_name, chip, &error_fatal);
         object_property_set_int(chip, PNV_CHIP_HWID(i), "chip-id",
                                 &error_fatal);
-        object_property_set_int(chip, smp_cores, "nr-cores", &error_fatal);
+        object_property_set_int(chip, cores_per_chip, "nr-cores", &error_fatal);
         object_property_set_int(chip, 1, "num-phbs", &error_fatal);
         object_property_set_bool(chip, true, "realized", &error_fatal);
     }