diff mbox series

[v8,02/12] s390x/cpu_topology: CPU topology objects and structures

Message ID 20220620140352.39398-3-pmorel@linux.ibm.com (mailing list archive)
State New, archived
Headers show
Series s390x: CPU Topology | expand

Commit Message

Pierre Morel June 20, 2022, 2:03 p.m. UTC
We use new objects to have a dynamic administration of the CPU topology.
The highest level object in this implementation is the s390 book and
in this first implementation of CPU topology for S390 we have a single
book.
The book is built as a SYSBUS bridge during the CPU initialization.
Other objects, sockets and core will be built after the parsing
of the QEMU -smp argument.

Every object under this single book will be build dynamically
immediately after a CPU has be realized if it is needed.
The CPU will fill the sockets once after the other, according to the
number of core per socket defined during the smp parsing.

Each CPU inside a socket will be represented by a bit in a 64bit
unsigned long. Set on plug and clear on unplug of a CPU.

For the S390 CPU topology, thread and cores are merged into
topology cores and the number of topology cores is the multiplication
of cores by the numbers of threads.

Signed-off-by: Pierre Morel <pmorel@linux.ibm.com>
---
 hw/s390x/cpu-topology.c         | 391 ++++++++++++++++++++++++++++++++
 hw/s390x/meson.build            |   1 +
 hw/s390x/s390-virtio-ccw.c      |   6 +
 include/hw/s390x/cpu-topology.h |  74 ++++++
 target/s390x/cpu.h              |  47 ++++
 5 files changed, 519 insertions(+)
 create mode 100644 hw/s390x/cpu-topology.c
 create mode 100644 include/hw/s390x/cpu-topology.h

Comments

Janosch Frank June 27, 2022, 1:31 p.m. UTC | #1
On 6/20/22 16:03, Pierre Morel wrote:
> We use new objects to have a dynamic administration of the CPU topology.
> The highest level object in this implementation is the s390 book and
> in this first implementation of CPU topology for S390 we have a single
> book.
> The book is built as a SYSBUS bridge during the CPU initialization.
> Other objects, sockets and core will be built after the parsing
> of the QEMU -smp argument.
> 
> Every object under this single book will be build dynamically
> immediately after a CPU has be realized if it is needed.
> The CPU will fill the sockets once after the other, according to the
> number of core per socket defined during the smp parsing.
> 
> Each CPU inside a socket will be represented by a bit in a 64bit
> unsigned long. Set on plug and clear on unplug of a CPU.
> 
> For the S390 CPU topology, thread and cores are merged into
> topology cores and the number of topology cores is the multiplication
> of cores by the numbers of threads.
> 
> Signed-off-by: Pierre Morel <pmorel@linux.ibm.com>

[...]

> diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h
> index 7d6d01325b..216adfde26 100644
> --- a/target/s390x/cpu.h
> +++ b/target/s390x/cpu.h
> @@ -565,6 +565,53 @@ typedef union SysIB {
>   } SysIB;
>   QEMU_BUILD_BUG_ON(sizeof(SysIB) != 4096);
>   
> +/* CPU type Topology List Entry */
> +typedef struct SysIBTl_cpu {
> +        uint8_t nl;
> +        uint8_t reserved0[3];
> +        uint8_t reserved1:5;
> +        uint8_t dedicated:1;
> +        uint8_t polarity:2;
> +        uint8_t type;
> +        uint16_t origin;
> +        uint64_t mask;
> +} SysIBTl_cpu;
> +QEMU_BUILD_BUG_ON(sizeof(SysIBTl_cpu) != 16);
> +
> +/* Container type Topology List Entry */
> +typedef struct SysIBTl_container {
> +        uint8_t nl;
> +        uint8_t reserved[6];
> +        uint8_t id;
> +} QEMU_PACKED SysIBTl_container;
> +QEMU_BUILD_BUG_ON(sizeof(SysIBTl_container) != 8);
> +
> +/* Generic Topology List Entry */
> +typedef union SysIBTl_entry {
> +        uint8_t nl;

This union member is unused, isn't it?

> +        SysIBTl_container container;
> +        SysIBTl_cpu cpu;
> +} SysIBTl_entry;
> +
> +#define TOPOLOGY_NR_MAG  6

TOPOLOGY_TOTAL_NR_MAGS ?

> +#define TOPOLOGY_NR_MAG6 0

TOPOLOGY_NR_TLES_MAG6 ?

I'm open to other suggestions but we need to differentiate between the 
number of mag array entries and the number of TLEs in the MAGs.

> +#define TOPOLOGY_NR_MAG5 1
> +#define TOPOLOGY_NR_MAG4 2
> +#define TOPOLOGY_NR_MAG3 3
> +#define TOPOLOGY_NR_MAG2 4
> +#define TOPOLOGY_NR_MAG1 5

I'd appreciate a \n here.

> +/* Configuration topology */
> +typedef struct SysIB_151x {
> +    uint8_t  res0[2];

You're using "reserved" everywhere but now it's "rev"?

> +    uint16_t length;
> +    uint8_t  mag[TOPOLOGY_NR_MAG];
> +    uint8_t  res1;
> +    uint8_t  mnest;
> +    uint32_t res2;
> +    SysIBTl_entry tle[0];
> +} SysIB_151x;
> +QEMU_BUILD_BUG_ON(sizeof(SysIB_151x) != 16);
> +
>   /* MMU defines */
>   #define ASCE_ORIGIN           (~0xfffULL) /* segment table origin             */
>   #define ASCE_SUBSPACE         0x200       /* subspace group control           */
Pierre Morel June 28, 2022, 11:08 a.m. UTC | #2
On 6/27/22 15:31, Janosch Frank wrote:
> On 6/20/22 16:03, Pierre Morel wrote:
>> We use new objects to have a dynamic administration of the CPU topology.
>> The highest level object in this implementation is the s390 book and
>> in this first implementation of CPU topology for S390 we have a single
>> book.
>> The book is built as a SYSBUS bridge during the CPU initialization.
>> Other objects, sockets and core will be built after the parsing
>> of the QEMU -smp argument.
>>
>> Every object under this single book will be build dynamically
>> immediately after a CPU has be realized if it is needed.
>> The CPU will fill the sockets once after the other, according to the
>> number of core per socket defined during the smp parsing.
>>
>> Each CPU inside a socket will be represented by a bit in a 64bit
>> unsigned long. Set on plug and clear on unplug of a CPU.
>>
>> For the S390 CPU topology, thread and cores are merged into
>> topology cores and the number of topology cores is the multiplication
>> of cores by the numbers of threads.
>>
>> Signed-off-by: Pierre Morel <pmorel@linux.ibm.com>
> 
> [...]
> 
>> diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h
>> index 7d6d01325b..216adfde26 100644
>> --- a/target/s390x/cpu.h
>> +++ b/target/s390x/cpu.h
>> @@ -565,6 +565,53 @@ typedef union SysIB {
>>   } SysIB;
>>   QEMU_BUILD_BUG_ON(sizeof(SysIB) != 4096);
>> +/* CPU type Topology List Entry */
>> +typedef struct SysIBTl_cpu {
>> +        uint8_t nl;
>> +        uint8_t reserved0[3];
>> +        uint8_t reserved1:5;
>> +        uint8_t dedicated:1;
>> +        uint8_t polarity:2;
>> +        uint8_t type;
>> +        uint16_t origin;
>> +        uint64_t mask;
>> +} SysIBTl_cpu;
>> +QEMU_BUILD_BUG_ON(sizeof(SysIBTl_cpu) != 16);
>> +
>> +/* Container type Topology List Entry */
>> +typedef struct SysIBTl_container {
>> +        uint8_t nl;
>> +        uint8_t reserved[6];
>> +        uint8_t id;
>> +} QEMU_PACKED SysIBTl_container;
>> +QEMU_BUILD_BUG_ON(sizeof(SysIBTl_container) != 8);
>> +
>> +/* Generic Topology List Entry */
>> +typedef union SysIBTl_entry {
>> +        uint8_t nl;
> 
> This union member is unused, isn't it?

Yes, forgot to remove it.
will do.

> 
>> +        SysIBTl_container container;
>> +        SysIBTl_cpu cpu;
>> +} SysIBTl_entry;
>> +
>> +#define TOPOLOGY_NR_MAG  6
> 
> TOPOLOGY_TOTAL_NR_MAGS ?

OK

> 
>> +#define TOPOLOGY_NR_MAG6 0
> 
> TOPOLOGY_NR_TLES_MAG6 ?
> 
> I'm open to other suggestions but we need to differentiate between the 
> number of mag array entries and the number of TLEs in the MAGs.

it is OK for me

> 
>> +#define TOPOLOGY_NR_MAG5 1
>> +#define TOPOLOGY_NR_MAG4 2
>> +#define TOPOLOGY_NR_MAG3 3
>> +#define TOPOLOGY_NR_MAG2 4
>> +#define TOPOLOGY_NR_MAG1 5
> 
> I'd appreciate a \n here.

ok

> 
>> +/* Configuration topology */
>> +typedef struct SysIB_151x {
>> +    uint8_t  res0[2];
> 
> You're using "reserved" everywhere but now it's "rev"?

ok, will used reserved overall

> 
>> +    uint16_t length;
>> +    uint8_t  mag[TOPOLOGY_NR_MAG];
>> +    uint8_t  res1;
>> +    uint8_t  mnest;
>> +    uint32_t res2;
>> +    SysIBTl_entry tle[0];
>> +} SysIB_151x;
>> +QEMU_BUILD_BUG_ON(sizeof(SysIB_151x) != 16);
>> +
>>   /* MMU defines */
>>   #define ASCE_ORIGIN           (~0xfffULL) /* segment table 
>> origin             */
>>   #define ASCE_SUBSPACE         0x200       /* subspace group 
>> control           */
> 

Thanks,
Pierre
Pierre Morel June 29, 2022, 3:25 p.m. UTC | #3
On 6/27/22 15:31, Janosch Frank wrote:
> On 6/20/22 16:03, Pierre Morel wrote:
>> We use new objects to have a dynamic administration of the CPU topology.
>> The highest level object in this implementation is the s390 book and
>> in this first implementation of CPU topology for S390 we have a single
>> book.
>> The book is built as a SYSBUS bridge during the CPU initialization.
>> Other objects, sockets and core will be built after the parsing
>> of the QEMU -smp argument.
>>
>> Every object under this single book will be build dynamically
>> immediately after a CPU has be realized if it is needed.
>> The CPU will fill the sockets once after the other, according to the
>> number of core per socket defined during the smp parsing.
>>
>> Each CPU inside a socket will be represented by a bit in a 64bit
>> unsigned long. Set on plug and clear on unplug of a CPU.
>>
>> For the S390 CPU topology, thread and cores are merged into
>> topology cores and the number of topology cores is the multiplication
>> of cores by the numbers of threads.
>>
>> Signed-off-by: Pierre Morel <pmorel@linux.ibm.com>
> 
> [...]
> 
>> diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h
>> index 7d6d01325b..216adfde26 100644
>> --- a/target/s390x/cpu.h
>> +++ b/target/s390x/cpu.h
>> @@ -565,6 +565,53 @@ typedef union SysIB {
>>   } SysIB;
>>   QEMU_BUILD_BUG_ON(sizeof(SysIB) != 4096);
>> +/* CPU type Topology List Entry */
>> +typedef struct SysIBTl_cpu {
>> +        uint8_t nl;
>> +        uint8_t reserved0[3];
>> +        uint8_t reserved1:5;
>> +        uint8_t dedicated:1;
>> +        uint8_t polarity:2;
>> +        uint8_t type;
>> +        uint16_t origin;
>> +        uint64_t mask;
>> +} SysIBTl_cpu;
>> +QEMU_BUILD_BUG_ON(sizeof(SysIBTl_cpu) != 16);
>> +
>> +/* Container type Topology List Entry */
>> +typedef struct SysIBTl_container {
>> +        uint8_t nl;
>> +        uint8_t reserved[6];
>> +        uint8_t id;
>> +} QEMU_PACKED SysIBTl_container;
>> +QEMU_BUILD_BUG_ON(sizeof(SysIBTl_container) != 8);
>> +
>> +/* Generic Topology List Entry */
>> +typedef union SysIBTl_entry {
>> +        uint8_t nl;
> 
> This union member is unused, isn't it?
> 
>> +        SysIBTl_container container;
>> +        SysIBTl_cpu cpu;
>> +} SysIBTl_entry;
>> +
>> +#define TOPOLOGY_NR_MAG  6
> 
> TOPOLOGY_TOTAL_NR_MAGS ?
> 
>> +#define TOPOLOGY_NR_MAG6 0
> 
> TOPOLOGY_NR_TLES_MAG6 ?
> 
> I'm open to other suggestions but we need to differentiate between the 
> number of mag array entries and the number of TLEs in the MAGs.


typedef enum {
         TOPOLOGY_MAG6 = 0,
         TOPOLOGY_MAG5 = 1,
         TOPOLOGY_MAG4 = 2,
         TOPOLOGY_MAG3 = 3,
         TOPOLOGY_MAG2 = 4,
         TOPOLOGY_MAG1 = 5,
         TOPOLOGY_TOTAL_MAGS = 6,
};


oder enum with TOPOLOGY_NR_TLES_MAGx ?

> 
>> +#define TOPOLOGY_NR_MAG5 1
>> +#define TOPOLOGY_NR_MAG4 2
>> +#define TOPOLOGY_NR_MAG3 3
>> +#define TOPOLOGY_NR_MAG2 4
>> +#define TOPOLOGY_NR_MAG1 5
> 
> I'd appreciate a \n here.

OK

> 
>> +/* Configuration topology */
>> +typedef struct SysIB_151x {
>> +    uint8_t  res0[2];
> 
> You're using "reserved" everywhere but now it's "rev"?

OK I will keep reserved

> 
>> +    uint16_t length;
>> +    uint8_t  mag[TOPOLOGY_NR_MAG];
>> +    uint8_t  res1;
>> +    uint8_t  mnest;
>> +    uint32_t res2;
>> +    SysIBTl_entry tle[0];
>> +} SysIB_151x;
>> +QEMU_BUILD_BUG_ON(sizeof(SysIB_151x) != 16);
>> +
>>   /* MMU defines */
>>   #define ASCE_ORIGIN           (~0xfffULL) /* segment table 
>> origin             */
>>   #define ASCE_SUBSPACE         0x200       /* subspace group 
>> control           */
> 
>
Janosch Frank July 4, 2022, 11:47 a.m. UTC | #4
On 6/29/22 17:25, Pierre Morel wrote:
> 
> 
> On 6/27/22 15:31, Janosch Frank wrote:
>> On 6/20/22 16:03, Pierre Morel wrote:
>>> We use new objects to have a dynamic administration of the CPU topology.
>>> The highest level object in this implementation is the s390 book and
>>> in this first implementation of CPU topology for S390 we have a single
>>> book.
>>> The book is built as a SYSBUS bridge during the CPU initialization.
>>> Other objects, sockets and core will be built after the parsing
>>> of the QEMU -smp argument.
>>>
>>> Every object under this single book will be build dynamically
>>> immediately after a CPU has be realized if it is needed.
>>> The CPU will fill the sockets once after the other, according to the
>>> number of core per socket defined during the smp parsing.
>>>
>>> Each CPU inside a socket will be represented by a bit in a 64bit
>>> unsigned long. Set on plug and clear on unplug of a CPU.
>>>
>>> For the S390 CPU topology, thread and cores are merged into
>>> topology cores and the number of topology cores is the multiplication
>>> of cores by the numbers of threads.
>>>
>>> Signed-off-by: Pierre Morel <pmorel@linux.ibm.com>
>>
>> [...]
>>
>>> diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h
>>> index 7d6d01325b..216adfde26 100644
>>> --- a/target/s390x/cpu.h
>>> +++ b/target/s390x/cpu.h
>>> @@ -565,6 +565,53 @@ typedef union SysIB {
>>>    } SysIB;
>>>    QEMU_BUILD_BUG_ON(sizeof(SysIB) != 4096);
>>> +/* CPU type Topology List Entry */
>>> +typedef struct SysIBTl_cpu {
>>> +        uint8_t nl;
>>> +        uint8_t reserved0[3];
>>> +        uint8_t reserved1:5;
>>> +        uint8_t dedicated:1;
>>> +        uint8_t polarity:2;
>>> +        uint8_t type;
>>> +        uint16_t origin;
>>> +        uint64_t mask;
>>> +} SysIBTl_cpu;
>>> +QEMU_BUILD_BUG_ON(sizeof(SysIBTl_cpu) != 16);
>>> +
>>> +/* Container type Topology List Entry */
>>> +typedef struct SysIBTl_container {
>>> +        uint8_t nl;
>>> +        uint8_t reserved[6];
>>> +        uint8_t id;
>>> +} QEMU_PACKED SysIBTl_container;
>>> +QEMU_BUILD_BUG_ON(sizeof(SysIBTl_container) != 8);
>>> +
>>> +/* Generic Topology List Entry */
>>> +typedef union SysIBTl_entry {
>>> +        uint8_t nl;
>>
>> This union member is unused, isn't it?
>>
>>> +        SysIBTl_container container;
>>> +        SysIBTl_cpu cpu;
>>> +} SysIBTl_entry;
>>> +
>>> +#define TOPOLOGY_NR_MAG  6
>>
>> TOPOLOGY_TOTAL_NR_MAGS ?
>>
>>> +#define TOPOLOGY_NR_MAG6 0
>>
>> TOPOLOGY_NR_TLES_MAG6 ?
>>
>> I'm open to other suggestions but we need to differentiate between the
>> number of mag array entries and the number of TLEs in the MAGs.
> 
> 
> typedef enum {
>           TOPOLOGY_MAG6 = 0,
>           TOPOLOGY_MAG5 = 1,
>           TOPOLOGY_MAG4 = 2,
>           TOPOLOGY_MAG3 = 3,
>           TOPOLOGY_MAG2 = 4,
>           TOPOLOGY_MAG1 = 5,
>           TOPOLOGY_TOTAL_MAGS = 6,
> };
> 
> 
> oder enum with TOPOLOGY_NR_TLES_MAGx ?

I'd stick with the shorter first variant.

> 
>>
>>> +#define TOPOLOGY_NR_MAG5 1
>>> +#define TOPOLOGY_NR_MAG4 2
>>> +#define TOPOLOGY_NR_MAG3 3
>>> +#define TOPOLOGY_NR_MAG2 4
>>> +#define TOPOLOGY_NR_MAG1 5
>>
>> I'd appreciate a \n here.
> 
> OK
> 
>>
>>> +/* Configuration topology */
>>> +typedef struct SysIB_151x {
>>> +    uint8_t  res0[2];
>>
>> You're using "reserved" everywhere but now it's "rev"?
> 
> OK I will keep reserved
> 
>>
>>> +    uint16_t length;
>>> +    uint8_t  mag[TOPOLOGY_NR_MAG];
>>> +    uint8_t  res1;
>>> +    uint8_t  mnest;
>>> +    uint32_t res2;
>>> +    SysIBTl_entry tle[0];
>>> +} SysIB_151x;
>>> +QEMU_BUILD_BUG_ON(sizeof(SysIB_151x) != 16);
>>> +
>>>    /* MMU defines */
>>>    #define ASCE_ORIGIN           (~0xfffULL) /* segment table
>>> origin             */
>>>    #define ASCE_SUBSPACE         0x200       /* subspace group
>>> control           */
>>
>>
>
Pierre Morel July 4, 2022, 2:51 p.m. UTC | #5
On 7/4/22 13:47, Janosch Frank wrote:
> On 6/29/22 17:25, Pierre Morel wrote:
>>
>>
>> On 6/27/22 15:31, Janosch Frank wrote:
>>> On 6/20/22 16:03, Pierre Morel wrote:
>>>> We use new objects to have a dynamic administration of the CPU 
>>>> topology.
>>>> The highest level object in this implementation is the s390 book and
>>>> in this first implementation of CPU topology for S390 we have a single
>>>> book.
>>>> The book is built as a SYSBUS bridge during the CPU initialization.
>>>> Other objects, sockets and core will be built after the parsing
>>>> of the QEMU -smp argument.
>>>>
>>>> Every object under this single book will be build dynamically
>>>> immediately after a CPU has be realized if it is needed.
>>>> The CPU will fill the sockets once after the other, according to the
>>>> number of core per socket defined during the smp parsing.
>>>>
>>>> Each CPU inside a socket will be represented by a bit in a 64bit
>>>> unsigned long. Set on plug and clear on unplug of a CPU.
>>>>
>>>> For the S390 CPU topology, thread and cores are merged into
>>>> topology cores and the number of topology cores is the multiplication
>>>> of cores by the numbers of threads.
>>>>
>>>> Signed-off-by: Pierre Morel <pmorel@linux.ibm.com>
>>>
>>> [...]
>>>
>>>> diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h
>>>> index 7d6d01325b..216adfde26 100644
>>>> --- a/target/s390x/cpu.h
>>>> +++ b/target/s390x/cpu.h
>>>> @@ -565,6 +565,53 @@ typedef union SysIB {
>>>>    } SysIB;
>>>>    QEMU_BUILD_BUG_ON(sizeof(SysIB) != 4096);
>>>> +/* CPU type Topology List Entry */
>>>> +typedef struct SysIBTl_cpu {
>>>> +        uint8_t nl;
>>>> +        uint8_t reserved0[3];
>>>> +        uint8_t reserved1:5;
>>>> +        uint8_t dedicated:1;
>>>> +        uint8_t polarity:2;
>>>> +        uint8_t type;
>>>> +        uint16_t origin;
>>>> +        uint64_t mask;
>>>> +} SysIBTl_cpu;
>>>> +QEMU_BUILD_BUG_ON(sizeof(SysIBTl_cpu) != 16);
>>>> +
>>>> +/* Container type Topology List Entry */
>>>> +typedef struct SysIBTl_container {
>>>> +        uint8_t nl;
>>>> +        uint8_t reserved[6];
>>>> +        uint8_t id;
>>>> +} QEMU_PACKED SysIBTl_container;
>>>> +QEMU_BUILD_BUG_ON(sizeof(SysIBTl_container) != 8);
>>>> +
>>>> +/* Generic Topology List Entry */
>>>> +typedef union SysIBTl_entry {
>>>> +        uint8_t nl;
>>>
>>> This union member is unused, isn't it?
>>>
>>>> +        SysIBTl_container container;
>>>> +        SysIBTl_cpu cpu;
>>>> +} SysIBTl_entry;
>>>> +
>>>> +#define TOPOLOGY_NR_MAG  6
>>>
>>> TOPOLOGY_TOTAL_NR_MAGS ?
>>>
>>>> +#define TOPOLOGY_NR_MAG6 0
>>>
>>> TOPOLOGY_NR_TLES_MAG6 ?
>>>
>>> I'm open to other suggestions but we need to differentiate between the
>>> number of mag array entries and the number of TLEs in the MAGs.
>>
>>
>> typedef enum {
>>           TOPOLOGY_MAG6 = 0,
>>           TOPOLOGY_MAG5 = 1,
>>           TOPOLOGY_MAG4 = 2,
>>           TOPOLOGY_MAG3 = 3,
>>           TOPOLOGY_MAG2 = 4,
>>           TOPOLOGY_MAG1 = 5,
>>           TOPOLOGY_TOTAL_MAGS = 6,
>> };
>>
>>
>> oder enum with TOPOLOGY_NR_TLES_MAGx ?
> 
> I'd stick with the shorter first variant.


OK, thanks

> 
>>
>>>
>>>> +#define TOPOLOGY_NR_MAG5 1
>>>> +#define TOPOLOGY_NR_MAG4 2
>>>> +#define TOPOLOGY_NR_MAG3 3
>>>> +#define TOPOLOGY_NR_MAG2 4
>>>> +#define TOPOLOGY_NR_MAG1 5
>>>
>>> I'd appreciate a \n here.
>>
>> OK
>>
>>>
>>>> +/* Configuration topology */
>>>> +typedef struct SysIB_151x {
>>>> +    uint8_t  res0[2];
>>>
>>> You're using "reserved" everywhere but now it's "rev"?
>>
>> OK I will keep reserved
>>
>>>
>>>> +    uint16_t length;
>>>> +    uint8_t  mag[TOPOLOGY_NR_MAG];
>>>> +    uint8_t  res1;
>>>> +    uint8_t  mnest;
>>>> +    uint32_t res2;
>>>> +    SysIBTl_entry tle[0];
>>>> +} SysIB_151x;
>>>> +QEMU_BUILD_BUG_ON(sizeof(SysIB_151x) != 16);
>>>> +
>>>>    /* MMU defines */
>>>>    #define ASCE_ORIGIN           (~0xfffULL) /* segment table
>>>> origin             */
>>>>    #define ASCE_SUBSPACE         0x200       /* subspace group
>>>> control           */
>>>
>>>
>>
>
Janis Schoetterl-Glausch July 12, 2022, 3:40 p.m. UTC | #6
On 6/20/22 16:03, Pierre Morel wrote:
> We use new objects to have a dynamic administration of the CPU topology.
> The highest level object in this implementation is the s390 book and
> in this first implementation of CPU topology for S390 we have a single
> book.
> The book is built as a SYSBUS bridge during the CPU initialization.
> Other objects, sockets and core will be built after the parsing
> of the QEMU -smp argument.
> 
> Every object under this single book will be build dynamically
> immediately after a CPU has be realized if it is needed.
> The CPU will fill the sockets once after the other, according to the
> number of core per socket defined during the smp parsing.
> 
> Each CPU inside a socket will be represented by a bit in a 64bit
> unsigned long. Set on plug and clear on unplug of a CPU.
> 
> For the S390 CPU topology, thread and cores are merged into
> topology cores and the number of topology cores is the multiplication
> of cores by the numbers of threads.
> 
> Signed-off-by: Pierre Morel <pmorel@linux.ibm.com>
> ---
>  hw/s390x/cpu-topology.c         | 391 ++++++++++++++++++++++++++++++++
>  hw/s390x/meson.build            |   1 +
>  hw/s390x/s390-virtio-ccw.c      |   6 +
>  include/hw/s390x/cpu-topology.h |  74 ++++++
>  target/s390x/cpu.h              |  47 ++++
>  5 files changed, 519 insertions(+)
>  create mode 100644 hw/s390x/cpu-topology.c
>  create mode 100644 include/hw/s390x/cpu-topology.h
> 
> diff --git a/hw/s390x/cpu-topology.c b/hw/s390x/cpu-topology.c
> new file mode 100644
> index 0000000000..0fd6f08084
> --- /dev/null
> +++ b/hw/s390x/cpu-topology.c
> @@ -0,0 +1,391 @@
> +/*
> + * CPU Topology
> + *
> + * Copyright 2022 IBM Corp.

Should be Copyright IBM Corp. 2022, and maybe even have a year range.

> + * Author(s): Pierre Morel <pmorel@linux.ibm.com>
> +
> + * This work is licensed under the terms of the GNU GPL, version 2 or (at
> + * your option) any later version. See the COPYING file in the top-level
> + * directory.
> + */
> +
> +#include "qemu/osdep.h"
> +#include "qapi/error.h"
> +#include "qemu/error-report.h"
> +#include "hw/sysbus.h"
> +#include "hw/s390x/cpu-topology.h"
> +#include "hw/qdev-properties.h"
> +#include "hw/boards.h"
> +#include "qemu/typedefs.h"
> +#include "target/s390x/cpu.h"
> +#include "hw/s390x/s390-virtio-ccw.h"
> +
> +/*
> + * s390_create_cores:
> + * @ms: Machine state
> + * @socket: the socket on which to create the core set
> + * @origin: the origin offset of the first core of the set
> + * @errp: Error pointer
> + *
> + * returns a pointer to the created S390TopologyCores structure
> + *
> + * On error: return NULL
> + */
> +static S390TopologyCores *s390_create_cores(MachineState *ms,
> +                                            S390TopologySocket *socket,
> +                                            int origin, Error **errp)
> +{
> +    DeviceState *dev;
> +    S390TopologyCores *cores;
> +
> +    if (socket->bus->num_children >= ms->smp.cores * ms->smp.threads) {
> +        error_setg(errp, "Unable to create more cores.");
> +        return NULL;
> +    }

Why/How can this happen?
The "location" of the CPU is a function of core_id and the same CPU should not be added twice.
If it's to enforce a limit on the smp arguments that should happen earlier in my opinion.
If it's necessary, you could also make the message more verbose and add ", maximum number reached".
> +
> +    dev = qdev_new(TYPE_S390_TOPOLOGY_CORES);
> +    qdev_realize_and_unref(dev, socket->bus, &error_fatal);

As a result of this, the order of cores in the socket bus is the creation order, correct?
So newest first and not ordered by the origin (since we can hot plug CPUs), correct?
> +
> +    cores = S390_TOPOLOGY_CORES(dev);
> +    cores->origin = origin;

I must admit that I haven't fully grokked the qemu object model, yet, but I'd be more comfortable
if you unref'ed cores after you set the origin.
Does the socket bus own the object after you unref it? Does it then make sense to return cores
after unref'ing it?
But then we don't support CPU unplug, so the object shouldn't just vanish.

> +    socket->cnt += 1;

cnt++ to be consistent with create_socket below.
> +
> +    return cores;
> +}
> +
> +/*
> + * s390_create_socket:
> + * @ms: Machine state
> + * @book: the book on which to create the socket
> + * @id: the socket id
> + * @errp: Error pointer
> + *
> + * returns a pointer to the created S390TopologySocket structure
> + *
> + * On error: return NULL
> + */
> +static S390TopologySocket *s390_create_socket(MachineState *ms,
> +                                              S390TopologyBook *book,
> +                                              int id, Error **errp)
> +{

Same questions/comments as above.

> +    DeviceState *dev;
> +    S390TopologySocket *socket;
> +
> +    if (book->bus->num_children >= ms->smp.sockets) {
> +        error_setg(errp, "Unable to create more sockets.");
> +        return NULL;
> +    }
> +
> +    dev = qdev_new(TYPE_S390_TOPOLOGY_SOCKET);
> +    qdev_realize_and_unref(dev, book->bus, &error_fatal);
> +
> +    socket = S390_TOPOLOGY_SOCKET(dev);
> +    socket->socket_id = id;
> +    book->cnt++;
> +
> +    return socket;
> +}
> +
> +/*
> + * s390_get_cores:
> + * @ms: Machine state
> + * @socket: the socket to search into
> + * @origin: the origin specified for the S390TopologyCores
> + * @errp: Error pointer
> + *
> + * returns a pointer to a S390TopologyCores structure within a socket having
> + * the specified origin.
> + * First search if the socket is already containing the S390TopologyCores
> + * structure and if not create one with this origin.
> + */
> +static S390TopologyCores *s390_get_cores(MachineState *ms,
> +                                         S390TopologySocket *socket,
> +                                         int origin, Error **errp)
> +{
> +    S390TopologyCores *cores;
> +    BusChild *kid;
> +
> +    QTAILQ_FOREACH(kid, &socket->bus->children, sibling) {
> +        cores = S390_TOPOLOGY_CORES(kid->child);
> +        if (cores->origin == origin) {
> +            return cores;
> +        }
> +    }
> +    return s390_create_cores(ms, socket, origin, errp);

I think calling create here is unintuative.
You only use get_cores once when creating a new cpu, I think doing

    cores = s390_get_cores(ms, socket, origin, errp);
    if (!cores) {
        cores = s390_create_cores(...);
    ]
    if (!cores) {
        return false;
    }

is more straight forward and readable.
> +}
> +
> +/*
> + * s390_get_socket:
> + * @ms: Machine state
> + * @book: The book to search into
> + * @socket_id: the identifier of the socket to search for
> + * @errp: Error pointer
> + *
> + * returns a pointer to a S390TopologySocket structure within a book having
> + * the specified socket_id.
> + * First search if the book is already containing the S390TopologySocket
> + * structure and if not create one with this socket_id.
> + */
> +static S390TopologySocket *s390_get_socket(MachineState *ms,
> +                                           S390TopologyBook *book,
> +                                           int socket_id, Error **errp)
> +{
> +    S390TopologySocket *socket;
> +    BusChild *kid;
> +
> +    QTAILQ_FOREACH(kid, &book->bus->children, sibling) {
> +        socket = S390_TOPOLOGY_SOCKET(kid->child);
> +        if (socket->socket_id == socket_id) {
> +            return socket;
> +        }
> +    }
> +    return s390_create_socket(ms, book, socket_id, errp);

As above.

> +}
> +
> +/*
> + * s390_topology_new_cpu:
> + * @core_id: the core ID is machine wide
> + *
> + * We have a single book returned by s390_get_topology(),
> + * then we build the hierarchy on demand.
> + * Note that we do not destroy the hierarchy on error creating
> + * an entry in the topology, we just keep it empty.
> + * We do not need to worry about not finding a topology level
> + * entry this would have been caught during smp parsing.
> + */
> +bool s390_topology_new_cpu(MachineState *ms, int core_id, Error **errp)
> +{
> +    S390TopologyBook *book;
> +    S390TopologySocket *socket;
> +    S390TopologyCores *cores;
> +    int nb_cores_per_socket;

num_cores_per_socket instead?

> +    int origin, bit;
> +
> +    book = s390_get_topology();
> +
> +    nb_cores_per_socket = ms->smp.cores * ms->smp.threads;

We don't support the multithreading facility, do we?
So, I think we should assert smp.threads == 1 somewhere.
In any case I think the correct expression would round the threads up to the next power of 2,
because the core_id has the thread id in the lower bits, but threads per core doesn't need to be
a power of 2 according to the architecture.

> +
> +    socket = s390_get_socket(ms, book, core_id / nb_cores_per_socket, errp);
> +    if (!socket) {
> +        return false;
> +    }
> +
> +    /*
> +     * At the core level, each CPU is represented by a bit in a 64bit
> +     * unsigned long. Set on plug and clear on unplug of a CPU.
> +     * The firmware assume that all CPU in the core description have the same
> +     * type, polarization and are all dedicated or shared.
> +     * In the case a socket contains CPU with different type, polarization
> +     * or dedication then they will be defined in different CPU containers.
> +     * Currently we assume all CPU are identical and the only reason to have
> +     * several S390TopologyCores inside a socket is to have more than 64 CPUs
> +     * in that case the origin field, representing the offset of the first CPU
> +     * in the CPU container allows to represent up to the maximal number of
> +     * CPU inside several CPU containers inside the socket container.
> +     */
> +    origin = 64 * (core_id / 64);
> +
> +    cores = s390_get_cores(ms, socket, origin, errp);
> +    if (!cores) {
> +        return false;
> +    }
> +
> +    bit = 63 - (core_id - origin);
> +    set_bit(bit, &cores->mask);
> +    cores->origin = origin;

This is redundant, origin is already set.
Also I think you should generally pass the core_id and not the origin.
Then on construction you can also set the bit.

> +
> +    return true;
> +}
> +
> +/*
> + * Setting the first topology: 1 book, 1 socket
> + * This is enough for 64 cores if the topology is flat (single socket)
> + */
> +void s390_topology_setup(MachineState *ms)
> +{
> +    DeviceState *dev;
> +
> +    /* Create BOOK bridge device */
> +    dev = qdev_new(TYPE_S390_TOPOLOGY_BOOK);
> +    object_property_add_child(qdev_get_machine(),
> +                              TYPE_S390_TOPOLOGY_BOOK, OBJECT(dev));

Why add it to the machine instead of directly using a static?
So it's visible to the user via info qtree or something?
Would that even be the appropriate location to show that?

> +    sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
> +}
> +
> +S390TopologyBook *s390_get_topology(void)
> +{
> +    static S390TopologyBook *book;
> +
> +    if (!book) {
> +        book = S390_TOPOLOGY_BOOK(
> +            object_resolve_path(TYPE_S390_TOPOLOGY_BOOK, NULL));
> +        assert(book != NULL);
> +    }
> +
> +    return book;
> +}
> +
> +/* --- CORES Definitions --- */
> +
> +static Property s390_topology_cores_properties[] = {
> +    DEFINE_PROP_BOOL("dedicated", S390TopologyCores, dedicated, false),
> +    DEFINE_PROP_UINT8("polarity", S390TopologyCores, polarity,
> +                      S390_TOPOLOGY_POLARITY_H),
> +    DEFINE_PROP_UINT8("cputype", S390TopologyCores, cputype,
> +                      S390_TOPOLOGY_CPU_TYPE),
> +    DEFINE_PROP_UINT16("origin", S390TopologyCores, origin, 0),
> +    DEFINE_PROP_UINT64("mask", S390TopologyCores, mask, 0),
> +    DEFINE_PROP_END_OF_LIST(),
> +};
> +
> +static void cpu_cores_class_init(ObjectClass *oc, void *data)
> +{
> +    DeviceClass *dc = DEVICE_CLASS(oc);
> +    HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);
> +
> +    device_class_set_props(dc, s390_topology_cores_properties);
> +    hc->unplug = qdev_simple_device_unplug_cb;
> +    dc->bus_type = TYPE_S390_TOPOLOGY_SOCKET_BUS;
> +    dc->desc = "topology cpu entry";
> +}
> +
> +static const TypeInfo cpu_cores_info = {
> +    .name          = TYPE_S390_TOPOLOGY_CORES,
> +    .parent        = TYPE_DEVICE,
> +    .instance_size = sizeof(S390TopologyCores),
> +    .class_init    = cpu_cores_class_init,
> +    .interfaces = (InterfaceInfo[]) {
> +        { TYPE_HOTPLUG_HANDLER },

Why implement the hotplug interface? That is not actually supported, is it?
> +        { }
> +    }
> +};
> +
> +static char *socket_bus_get_dev_path(DeviceState *dev)
> +{
> +    S390TopologySocket *socket = S390_TOPOLOGY_SOCKET(dev);
> +    DeviceState *book = dev->parent_bus->parent;
> +    char *id = qdev_get_dev_path(book);
> +    char *ret;
> +
> +    if (id) {
> +        ret = g_strdup_printf("%s:%02d", id, socket->socket_id);
> +        g_free(id);
> +    } else {
> +        ret = g_strdup_printf("_:%02d", socket->socket_id);

How can this case occur? Sockets get attached to the book bus immediately after creation, correct?

> +    }
> +
> +    return ret;
> +}
> +
> +static void socket_bus_class_init(ObjectClass *oc, void *data)
> +{
> +    BusClass *k = BUS_CLASS(oc);
> +
> +    k->get_dev_path = socket_bus_get_dev_path;
> +    k->max_dev = S390_MAX_SOCKETS;

This is the bus the cores are attached to, correct?
Is this constant badly named, or should this be MAX_CORES (which doesn't exist)?
How does this limit get enforced?
Why is there a limit in the first place? I don't see one defined by STSI, other than having to fit in a u8.
> +}
> +
> +static const TypeInfo socket_bus_info = {
> +    .name = TYPE_S390_TOPOLOGY_SOCKET_BUS,
> +    .parent = TYPE_BUS,
> +    .instance_size = 0,

After a bit of grepping it seems to me that omitting that field is more common that setting it to 0.

> +    .class_init = socket_bus_class_init,
> +};
> +
> +static void s390_socket_device_realize(DeviceState *dev, Error **errp)
> +{
> +    S390TopologySocket *socket = S390_TOPOLOGY_SOCKET(dev);
> +    BusState *bus;
> +
> +    bus = qbus_new(TYPE_S390_TOPOLOGY_SOCKET_BUS, dev,
> +                   TYPE_S390_TOPOLOGY_SOCKET_BUS);
> +    qbus_set_hotplug_handler(bus, OBJECT(dev));
> +    socket->bus = bus;
> +}
> +
> +static void socket_class_init(ObjectClass *oc, void *data)
> +{
> +    DeviceClass *dc = DEVICE_CLASS(oc);
> +    HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);
> +
> +    hc->unplug = qdev_simple_device_unplug_cb;
> +    set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
> +    dc->bus_type = TYPE_S390_TOPOLOGY_BOOK_BUS;
> +    dc->realize = s390_socket_device_realize;
> +    dc->desc = "topology socket";
> +}
> +
> +static const TypeInfo socket_info = {
> +    .name          = TYPE_S390_TOPOLOGY_SOCKET,
> +    .parent        = TYPE_DEVICE,
> +    .instance_size = sizeof(S390TopologySocket),
> +    .class_init    = socket_class_init,
> +    .interfaces = (InterfaceInfo[]) {
> +        { TYPE_HOTPLUG_HANDLER },
> +        { }
> +    }
> +};
> +
> +static char *book_bus_get_dev_path(DeviceState *dev)
> +{
> +    return g_strdup("00");
> +}
> +
> +static void book_bus_class_init(ObjectClass *oc, void *data)
> +{
> +    BusClass *k = BUS_CLASS(oc);
> +
> +    k->get_dev_path = book_bus_get_dev_path;
> +    k->max_dev = S390_MAX_BOOKS;

Same question as for socket_bus_class_init here.

> +}
> +
> +static const TypeInfo book_bus_info = {
> +    .name = TYPE_S390_TOPOLOGY_BOOK_BUS,
> +    .parent = TYPE_BUS,
> +    .instance_size = 0,
> +    .class_init = book_bus_class_init,
> +};
> +
> +static void s390_book_device_realize(DeviceState *dev, Error **errp)
> +{
> +    S390TopologyBook *book = S390_TOPOLOGY_BOOK(dev);
> +    BusState *bus;
> +
> +    bus = qbus_new(TYPE_S390_TOPOLOGY_BOOK_BUS, dev,
> +                   TYPE_S390_TOPOLOGY_BOOK_BUS);
> +    qbus_set_hotplug_handler(bus, OBJECT(dev));
> +    book->bus = bus;
> +}
> +
> +static void book_class_init(ObjectClass *oc, void *data)
> +{
> +    DeviceClass *dc = DEVICE_CLASS(oc);
> +    HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);
> +
> +    hc->unplug = qdev_simple_device_unplug_cb;
> +    set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
> +    dc->realize = s390_book_device_realize;
> +    dc->desc = "topology book";
> +}
> +
> +static const TypeInfo book_info = {
> +    .name          = TYPE_S390_TOPOLOGY_BOOK,
> +    .parent        = TYPE_SYS_BUS_DEVICE,
> +    .instance_size = sizeof(S390TopologyBook),
> +    .class_init    = book_class_init,
> +    .interfaces = (InterfaceInfo[]) {
> +        { TYPE_HOTPLUG_HANDLER },
> +        { }
> +    }
> +};
> +
> +static void topology_register(void)
> +{
> +    type_register_static(&cpu_cores_info);
> +    type_register_static(&socket_bus_info);
> +    type_register_static(&socket_info);
> +    type_register_static(&book_bus_info);
> +    type_register_static(&book_info);
> +}
> +
> +type_init(topology_register);
> diff --git a/hw/s390x/meson.build b/hw/s390x/meson.build
> index feefe0717e..3592fa952b 100644
> --- a/hw/s390x/meson.build
> +++ b/hw/s390x/meson.build
> @@ -2,6 +2,7 @@ s390x_ss = ss.source_set()
>  s390x_ss.add(files(
>    'ap-bridge.c',
>    'ap-device.c',
> +  'cpu-topology.c',
>    'ccw-device.c',
>    'css-bridge.c',
>    'css.c',
> diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
> index cc3097bfee..a586875b24 100644
> --- a/hw/s390x/s390-virtio-ccw.c
> +++ b/hw/s390x/s390-virtio-ccw.c
> @@ -43,6 +43,7 @@
>  #include "sysemu/sysemu.h"
>  #include "hw/s390x/pv.h"
>  #include "migration/blocker.h"
> +#include "hw/s390x/cpu-topology.h"
>  
>  static Error *pv_mig_blocker;
>  
> @@ -89,6 +90,7 @@ static void s390_init_cpus(MachineState *machine)
>      /* initialize possible_cpus */
>      mc->possible_cpu_arch_ids(machine);
>  
> +    s390_topology_setup(machine);
>      for (i = 0; i < machine->smp.cpus; i++) {
>          s390x_new_cpu(machine->cpu_type, i, &error_fatal);
>      }
> @@ -306,6 +308,10 @@ static void s390_cpu_plug(HotplugHandler *hotplug_dev,
>      g_assert(!ms->possible_cpus->cpus[cpu->env.core_id].cpu);
>      ms->possible_cpus->cpus[cpu->env.core_id].cpu = OBJECT(dev);
>  
> +    if (!s390_topology_new_cpu(ms, cpu->env.core_id, errp)) {
> +        return;
> +    }
> +
>      if (dev->hotplugged) {
>          raise_irq_cpu_hotplug();
>      }
> diff --git a/include/hw/s390x/cpu-topology.h b/include/hw/s390x/cpu-topology.h
> new file mode 100644
> index 0000000000..beec61706c
> --- /dev/null
> +++ b/include/hw/s390x/cpu-topology.h
> @@ -0,0 +1,74 @@
> +/*
> + * CPU Topology
> + *
> + * Copyright 2022 IBM Corp.

Same issue as with .c copyright notice.
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or (at
> + * your option) any later version. See the COPYING file in the top-level
> + * directory.
> + */
> +#ifndef HW_S390X_CPU_TOPOLOGY_H
> +#define HW_S390X_CPU_TOPOLOGY_H
> +
> +#include "hw/qdev-core.h"
> +#include "qom/object.h"
> +
> +#define S390_TOPOLOGY_CPU_TYPE    0x03

This is the IFL type, right? If so the name should reflect it.
> +
> +#define S390_TOPOLOGY_POLARITY_H  0x00
> +#define S390_TOPOLOGY_POLARITY_VL 0x01
> +#define S390_TOPOLOGY_POLARITY_VM 0x02
> +#define S390_TOPOLOGY_POLARITY_VH 0x03

Why not use an enum?
> +
> +#define TYPE_S390_TOPOLOGY_CORES "topology cores"

Seems to me that using a - instead of a space is the usual way of doing things.
> +    /*
> +     * Each CPU inside a socket will be represented by a bit in a 64bit
> +     * unsigned long. Set on plug and clear on unplug of a CPU.
> +     * All CPU inside a mask share the same dedicated, polarity and
> +     * cputype values.
> +     * The origin is the offset of the first CPU in a mask.
> +     */
> +struct S390TopologyCores {
> +    DeviceState parent_obj;
> +    int id;
> +    bool dedicated;
> +    uint8_t polarity;
> +    uint8_t cputype;

Why not snake_case for cpu type?

> +    uint16_t origin;
> +    uint64_t mask;
> +    int cnt;

num_cores instead ?

> +};
> +typedef struct S390TopologyCores S390TopologyCores;
> +OBJECT_DECLARE_SIMPLE_TYPE(S390TopologyCores, S390_TOPOLOGY_CORES)
> +
> +#define TYPE_S390_TOPOLOGY_SOCKET "topology socket"
> +#define TYPE_S390_TOPOLOGY_SOCKET_BUS "socket-bus"
> +struct S390TopologySocket {
> +    DeviceState parent_obj;
> +    BusState *bus;
> +    int socket_id;
> +    int cnt;
> +};
> +typedef struct S390TopologySocket S390TopologySocket;
> +OBJECT_DECLARE_SIMPLE_TYPE(S390TopologySocket, S390_TOPOLOGY_SOCKET)
> +#define S390_MAX_SOCKETS 4
> +
> +#define TYPE_S390_TOPOLOGY_BOOK "topology book"
> +#define TYPE_S390_TOPOLOGY_BOOK_BUS "book-bus"
> +struct S390TopologyBook {
> +    SysBusDevice parent_obj;
> +    BusState *bus;
> +    int book_id;
> +    int cnt;
> +};
> +typedef struct S390TopologyBook S390TopologyBook;
> +OBJECT_DECLARE_SIMPLE_TYPE(S390TopologyBook, S390_TOPOLOGY_BOOK)
> +#define S390_MAX_BOOKS 1
> +
> +S390TopologyBook *s390_init_topology(void);
> +
> +S390TopologyBook *s390_get_topology(void);
> +void s390_topology_setup(MachineState *ms);
> +bool s390_topology_new_cpu(MachineState *ms, int core_id, Error **errp);
> +
> +#endif
> diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h
> index 7d6d01325b..216adfde26 100644
> --- a/target/s390x/cpu.h
> +++ b/target/s390x/cpu.h

I think these definitions should be moved to the STSI patch since they're not used in this one.

> @@ -565,6 +565,53 @@ typedef union SysIB {
>  } SysIB;
>  QEMU_BUILD_BUG_ON(sizeof(SysIB) != 4096);
>  
> +/* CPU type Topology List Entry */
> +typedef struct SysIBTl_cpu {
> +        uint8_t nl;
> +        uint8_t reserved0[3];
> +        uint8_t reserved1:5;
> +        uint8_t dedicated:1;
> +        uint8_t polarity:2;
> +        uint8_t type;
> +        uint16_t origin;
> +        uint64_t mask;
> +} SysIBTl_cpu;
> +QEMU_BUILD_BUG_ON(sizeof(SysIBTl_cpu) != 16);
> +
> +/* Container type Topology List Entry */
> +typedef struct SysIBTl_container {
> +        uint8_t nl;
> +        uint8_t reserved[6];
> +        uint8_t id;
> +} QEMU_PACKED SysIBTl_container;
> +QEMU_BUILD_BUG_ON(sizeof(SysIBTl_container) != 8);
> +
> +/* Generic Topology List Entry */
> +typedef union SysIBTl_entry {
> +        uint8_t nl;
> +        SysIBTl_container container;
> +        SysIBTl_cpu cpu;
> +} SysIBTl_entry;

I don't like this union, it's only used in SysIB_151x below and that's misleading,
because the entries are packed without padding, but the union members have different
sizes.

> +
> +#define TOPOLOGY_NR_MAG  6
> +#define TOPOLOGY_NR_MAG6 0
> +#define TOPOLOGY_NR_MAG5 1
> +#define TOPOLOGY_NR_MAG4 2
> +#define TOPOLOGY_NR_MAG3 3
> +#define TOPOLOGY_NR_MAG2 4
> +#define TOPOLOGY_NR_MAG1 5
> +/* Configuration topology */
> +typedef struct SysIB_151x {
> +    uint8_t  res0[2];
> +    uint16_t length;
> +    uint8_t  mag[TOPOLOGY_NR_MAG];
> +    uint8_t  res1;
> +    uint8_t  mnest;
> +    uint32_t res2;
> +    SysIBTl_entry tle[0];

I think this should just be a uint64_t[] or uint64_t[0], whichever is QEMU style.
> +} SysIB_151x;
> +QEMU_BUILD_BUG_ON(sizeof(SysIB_151x) != 16);
> +
>  /* MMU defines */
>  #define ASCE_ORIGIN           (~0xfffULL) /* segment table origin             */
>  #define ASCE_SUBSPACE         0x200       /* subspace group control           */
Pierre Morel July 13, 2022, 2:59 p.m. UTC | #7
On 7/12/22 17:40, Janis Schoetterl-Glausch wrote:
> On 6/20/22 16:03, Pierre Morel wrote:
>> We use new objects to have a dynamic administration of the CPU topology.
>> The highest level object in this implementation is the s390 book and
>> in this first implementation of CPU topology for S390 we have a single
>> book.
>> The book is built as a SYSBUS bridge during the CPU initialization.
>> Other objects, sockets and core will be built after the parsing
>> of the QEMU -smp argument.
>>
>> Every object under this single book will be build dynamically
>> immediately after a CPU has be realized if it is needed.
>> The CPU will fill the sockets once after the other, according to the
>> number of core per socket defined during the smp parsing.
>>
>> Each CPU inside a socket will be represented by a bit in a 64bit
>> unsigned long. Set on plug and clear on unplug of a CPU.
>>
>> For the S390 CPU topology, thread and cores are merged into
>> topology cores and the number of topology cores is the multiplication
>> of cores by the numbers of threads.
>>
>> Signed-off-by: Pierre Morel <pmorel@linux.ibm.com>
>> ---
>>   hw/s390x/cpu-topology.c         | 391 ++++++++++++++++++++++++++++++++
>>   hw/s390x/meson.build            |   1 +
>>   hw/s390x/s390-virtio-ccw.c      |   6 +
>>   include/hw/s390x/cpu-topology.h |  74 ++++++
>>   target/s390x/cpu.h              |  47 ++++
>>   5 files changed, 519 insertions(+)
>>   create mode 100644 hw/s390x/cpu-topology.c
>>   create mode 100644 include/hw/s390x/cpu-topology.h
>>
>> diff --git a/hw/s390x/cpu-topology.c b/hw/s390x/cpu-topology.c
>> new file mode 100644
>> index 0000000000..0fd6f08084
>> --- /dev/null
>> +++ b/hw/s390x/cpu-topology.c
>> @@ -0,0 +1,391 @@
>> +/*
>> + * CPU Topology
>> + *
>> + * Copyright 2022 IBM Corp.
> 
> Should be Copyright IBM Corp. 2022, and maybe even have a year range.

OK

> 
>> + * Author(s): Pierre Morel <pmorel@linux.ibm.com>
>> +
>> + * This work is licensed under the terms of the GNU GPL, version 2 or (at
>> + * your option) any later version. See the COPYING file in the top-level
>> + * directory.
>> + */
>> +
>> +#include "qemu/osdep.h"
>> +#include "qapi/error.h"
>> +#include "qemu/error-report.h"
>> +#include "hw/sysbus.h"
>> +#include "hw/s390x/cpu-topology.h"
>> +#include "hw/qdev-properties.h"
>> +#include "hw/boards.h"
>> +#include "qemu/typedefs.h"
>> +#include "target/s390x/cpu.h"
>> +#include "hw/s390x/s390-virtio-ccw.h"
>> +
>> +/*
>> + * s390_create_cores:
>> + * @ms: Machine state
>> + * @socket: the socket on which to create the core set
>> + * @origin: the origin offset of the first core of the set
>> + * @errp: Error pointer
>> + *
>> + * returns a pointer to the created S390TopologyCores structure
>> + *
>> + * On error: return NULL
>> + */
>> +static S390TopologyCores *s390_create_cores(MachineState *ms,
>> +                                            S390TopologySocket *socket,
>> +                                            int origin, Error **errp)
>> +{
>> +    DeviceState *dev;
>> +    S390TopologyCores *cores;
>> +
>> +    if (socket->bus->num_children >= ms->smp.cores * ms->smp.threads) {
>> +        error_setg(errp, "Unable to create more cores.");
>> +        return NULL;
>> +    }
> 
> Why/How can this happen?
> The "location" of the CPU is a function of core_id and the same CPU should not be added twice.
> If it's to enforce a limit on the smp arguments that should happen earlier in my opinion.
> If it's necessary, you could also make the message more verbose and add ", maximum number reached".

should not happen.

>> +
>> +    dev = qdev_new(TYPE_S390_TOPOLOGY_CORES);
>> +    qdev_realize_and_unref(dev, socket->bus, &error_fatal);
> 
> As a result of this, the order of cores in the socket bus is the creation order, correct?
> So newest first and not ordered by the origin (since we can hot plug CPUs), correct?

yes

>> +
>> +    cores = S390_TOPOLOGY_CORES(dev);
>> +    cores->origin = origin;
> 
> I must admit that I haven't fully grokked the qemu object model, yet, but I'd be more comfortable
> if you unref'ed cores after you set the origin.
> Does the socket bus own the object after you unref it? Does it then make sense to return cores
> after unref'ing it?

AFAIU yes the bus owns it.

> But then we don't support CPU unplug, so the object shouldn't just vanish.
> 
>> +    socket->cnt += 1;
> 
> cnt++ to be consistent with create_socket below.

OK

>> +
>> +    return cores;
>> +}
>> +
>> +/*
>> + * s390_create_socket:
>> + * @ms: Machine state
>> + * @book: the book on which to create the socket
>> + * @id: the socket id
>> + * @errp: Error pointer
>> + *
>> + * returns a pointer to the created S390TopologySocket structure
>> + *
>> + * On error: return NULL
>> + */
>> +static S390TopologySocket *s390_create_socket(MachineState *ms,
>> +                                              S390TopologyBook *book,
>> +                                              int id, Error **errp)
>> +{
> 
> Same questions/comments as above.

Same answer, should not happen.
I will remove the check.

> 
>> +    DeviceState *dev;
>> +    S390TopologySocket *socket;
>> +
>> +    if (book->bus->num_children >= ms->smp.sockets) {
>> +        error_setg(errp, "Unable to create more sockets.");
>> +        return NULL;
>> +    }
>> +
>> +    dev = qdev_new(TYPE_S390_TOPOLOGY_SOCKET);
>> +    qdev_realize_and_unref(dev, book->bus, &error_fatal);
>> +
>> +    socket = S390_TOPOLOGY_SOCKET(dev);
>> +    socket->socket_id = id;
>> +    book->cnt++;
>> +
>> +    return socket;
>> +}
>> +
>> +/*
>> + * s390_get_cores:
>> + * @ms: Machine state
>> + * @socket: the socket to search into
>> + * @origin: the origin specified for the S390TopologyCores
>> + * @errp: Error pointer
>> + *
>> + * returns a pointer to a S390TopologyCores structure within a socket having
>> + * the specified origin.
>> + * First search if the socket is already containing the S390TopologyCores
>> + * structure and if not create one with this origin.
>> + */
>> +static S390TopologyCores *s390_get_cores(MachineState *ms,
>> +                                         S390TopologySocket *socket,
>> +                                         int origin, Error **errp)
>> +{
>> +    S390TopologyCores *cores;
>> +    BusChild *kid;
>> +
>> +    QTAILQ_FOREACH(kid, &socket->bus->children, sibling) {
>> +        cores = S390_TOPOLOGY_CORES(kid->child);
>> +        if (cores->origin == origin) {
>> +            return cores;
>> +        }
>> +    }
>> +    return s390_create_cores(ms, socket, origin, errp);
> 
> I think calling create here is unintuative.
> You only use get_cores once when creating a new cpu, I think doing
> 
>      cores = s390_get_cores(ms, socket, origin, errp);
>      if (!cores) {
>          cores = s390_create_cores(...);
>      ]
>      if (!cores) {
>          return false;
>      }
> 
> is more straight forward and readable.

I will keep it so as the creation can not fail.
As it was in the first series, I made this change later, and it was not 
a good idea.

>> +}
>> +
>> +/*
>> + * s390_get_socket:
>> + * @ms: Machine state
>> + * @book: The book to search into
>> + * @socket_id: the identifier of the socket to search for
>> + * @errp: Error pointer
>> + *
>> + * returns a pointer to a S390TopologySocket structure within a book having
>> + * the specified socket_id.
>> + * First search if the book is already containing the S390TopologySocket
>> + * structure and if not create one with this socket_id.
>> + */
>> +static S390TopologySocket *s390_get_socket(MachineState *ms,
>> +                                           S390TopologyBook *book,
>> +                                           int socket_id, Error **errp)
>> +{
>> +    S390TopologySocket *socket;
>> +    BusChild *kid;
>> +
>> +    QTAILQ_FOREACH(kid, &book->bus->children, sibling) {
>> +        socket = S390_TOPOLOGY_SOCKET(kid->child);
>> +        if (socket->socket_id == socket_id) {
>> +            return socket;
>> +        }
>> +    }
>> +    return s390_create_socket(ms, book, socket_id, errp);
> 
> As above.
> 
>> +}
>> +
>> +/*
>> + * s390_topology_new_cpu:
>> + * @core_id: the core ID is machine wide
>> + *
>> + * We have a single book returned by s390_get_topology(),
>> + * then we build the hierarchy on demand.
>> + * Note that we do not destroy the hierarchy on error creating
>> + * an entry in the topology, we just keep it empty.
>> + * We do not need to worry about not finding a topology level
>> + * entry this would have been caught during smp parsing.
>> + */
>> +bool s390_topology_new_cpu(MachineState *ms, int core_id, Error **errp)
>> +{
>> +    S390TopologyBook *book;
>> +    S390TopologySocket *socket;
>> +    S390TopologyCores *cores;
>> +    int nb_cores_per_socket;
> 
> num_cores_per_socket instead?
> 
>> +    int origin, bit;
>> +
>> +    book = s390_get_topology();
>> +
>> +    nb_cores_per_socket = ms->smp.cores * ms->smp.threads;
> 
> We don't support the multithreading facility, do we?
> So, I think we should assert smp.threads == 1 somewhere.
> In any case I think the correct expression would round the threads up to the next power of 2,
> because the core_id has the thread id in the lower bits, but threads per core doesn't need to be
> a power of 2 according to the architecture.

That is right.
I will add that.


> 
>> +
>> +    socket = s390_get_socket(ms, book, core_id / nb_cores_per_socket, errp);
>> +    if (!socket) {
>> +        return false;
>> +    }
>> +
>> +    /*
>> +     * At the core level, each CPU is represented by a bit in a 64bit
>> +     * unsigned long. Set on plug and clear on unplug of a CPU.
>> +     * The firmware assume that all CPU in the core description have the same
>> +     * type, polarization and are all dedicated or shared.
>> +     * In the case a socket contains CPU with different type, polarization
>> +     * or dedication then they will be defined in different CPU containers.
>> +     * Currently we assume all CPU are identical and the only reason to have
>> +     * several S390TopologyCores inside a socket is to have more than 64 CPUs
>> +     * in that case the origin field, representing the offset of the first CPU
>> +     * in the CPU container allows to represent up to the maximal number of
>> +     * CPU inside several CPU containers inside the socket container.
>> +     */
>> +    origin = 64 * (core_id / 64);
>> +
>> +    cores = s390_get_cores(ms, socket, origin, errp);
>> +    if (!cores) {
>> +        return false;
>> +    }
>> +
>> +    bit = 63 - (core_id - origin);
>> +    set_bit(bit, &cores->mask);
>> +    cores->origin = origin;
> 
> This is redundant, origin is already set.

right

> Also I think you should generally pass the core_id and not the origin.
> Then on construction you can also set the bit.

OK

> 
>> +
>> +    return true;
>> +}
>> +
>> +/*
>> + * Setting the first topology: 1 book, 1 socket
>> + * This is enough for 64 cores if the topology is flat (single socket)
>> + */
>> +void s390_topology_setup(MachineState *ms)
>> +{
>> +    DeviceState *dev;
>> +
>> +    /* Create BOOK bridge device */
>> +    dev = qdev_new(TYPE_S390_TOPOLOGY_BOOK);
>> +    object_property_add_child(qdev_get_machine(),
>> +                              TYPE_S390_TOPOLOGY_BOOK, OBJECT(dev));
> 
> Why add it to the machine instead of directly using a static?

For my opinion it is a characteristic of the machine.

> So it's visible to the user via info qtree or something?

It is already visible to the user on info qtree.

> Would that even be the appropriate location to show that?

That is a very good question and I really appreciate if we discuss on 
the design before diving into details.

The idea is to have the architecture details being on qtree as object so 
we can plug new drawers/books/socket/cores and in the future when the 
infrastructure allows it unplug them.

There is a info numa (info cpus does not give a lot info) to give 
information on nodes but AFAIU, a node is more a theoritical that can be 
used above the virtual architecture, sockets/cores, to specify 
characteristics like distance and associated memory.

As I understand it can be used above socket and for us above books or 
drawers too like in:

-numa cpu,node-id=0,socket-id=0

All cores in socket 0 belong to node 0

or
-numa cpu,node-id=1,drawer-id=1

all cores from all sockets of drawer 1 belong to node 1


As there is no info socket, I think that for now we do not need an info 
book/drawer we have everything in qtree.


> 
>> +    sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
>> +}
>> +
>> +S390TopologyBook *s390_get_topology(void)
>> +{
>> +    static S390TopologyBook *book;
>> +
>> +    if (!book) {
>> +        book = S390_TOPOLOGY_BOOK(
>> +            object_resolve_path(TYPE_S390_TOPOLOGY_BOOK, NULL));
>> +        assert(book != NULL);
>> +    }
>> +
>> +    return book;
>> +}
>> +
>> +/* --- CORES Definitions --- */
>> +
>> +static Property s390_topology_cores_properties[] = {
>> +    DEFINE_PROP_BOOL("dedicated", S390TopologyCores, dedicated, false),
>> +    DEFINE_PROP_UINT8("polarity", S390TopologyCores, polarity,
>> +                      S390_TOPOLOGY_POLARITY_H),
>> +    DEFINE_PROP_UINT8("cputype", S390TopologyCores, cputype,
>> +                      S390_TOPOLOGY_CPU_TYPE),
>> +    DEFINE_PROP_UINT16("origin", S390TopologyCores, origin, 0),
>> +    DEFINE_PROP_UINT64("mask", S390TopologyCores, mask, 0),
>> +    DEFINE_PROP_END_OF_LIST(),
>> +};
>> +
>> +static void cpu_cores_class_init(ObjectClass *oc, void *data)
>> +{
>> +    DeviceClass *dc = DEVICE_CLASS(oc);
>> +    HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);
>> +
>> +    device_class_set_props(dc, s390_topology_cores_properties);
>> +    hc->unplug = qdev_simple_device_unplug_cb;
>> +    dc->bus_type = TYPE_S390_TOPOLOGY_SOCKET_BUS;
>> +    dc->desc = "topology cpu entry";
>> +}
>> +
>> +static const TypeInfo cpu_cores_info = {
>> +    .name          = TYPE_S390_TOPOLOGY_CORES,
>> +    .parent        = TYPE_DEVICE,
>> +    .instance_size = sizeof(S390TopologyCores),
>> +    .class_init    = cpu_cores_class_init,
>> +    .interfaces = (InterfaceInfo[]) {
>> +        { TYPE_HOTPLUG_HANDLER },
> 
> Why implement the hotplug interface? That is not actually supported, is it?

exact, no need.

>> +        { }
>> +    }
>> +};
>> +
>> +static char *socket_bus_get_dev_path(DeviceState *dev)
>> +{
>> +    S390TopologySocket *socket = S390_TOPOLOGY_SOCKET(dev);
>> +    DeviceState *book = dev->parent_bus->parent;
>> +    char *id = qdev_get_dev_path(book);
>> +    char *ret;
>> +
>> +    if (id) {
>> +        ret = g_strdup_printf("%s:%02d", id, socket->socket_id);
>> +        g_free(id);
>> +    } else {
>> +        ret = g_strdup_printf("_:%02d", socket->socket_id);
> 
> How can this case occur? Sockets get attached to the book bus immediately after creation, correct?

yes

> 
>> +    }
>> +
>> +    return ret;
>> +}
>> +
>> +static void socket_bus_class_init(ObjectClass *oc, void *data)
>> +{
>> +    BusClass *k = BUS_CLASS(oc);
>> +
>> +    k->get_dev_path = socket_bus_get_dev_path;
>> +    k->max_dev = S390_MAX_SOCKETS;
> 
> This is the bus the cores are attached to, correct?
> Is this constant badly named, or should this be MAX_CORES (which doesn't exist)?
> How does this limit get enforced?
> Why is there a limit in the first place? I don't see one defined by STSI, other than having to fit in a u8.

I will suppress this, it was intended to be an architecture dependant 
value. for Example, could be 3 for Z16, 5 for Z17 but I am not sure it 
is worth.

>> +}
>> +
>> +static const TypeInfo socket_bus_info = {
>> +    .name = TYPE_S390_TOPOLOGY_SOCKET_BUS,
>> +    .parent = TYPE_BUS,
>> +    .instance_size = 0,
> 
> After a bit of grepping it seems to me that omitting that field is more common that setting it to 0.

right

> 
>> +    .class_init = socket_bus_class_init,
>> +};
>> +
>> +static void s390_socket_device_realize(DeviceState *dev, Error **errp)
>> +{
>> +    S390TopologySocket *socket = S390_TOPOLOGY_SOCKET(dev);
>> +    BusState *bus;
>> +
>> +    bus = qbus_new(TYPE_S390_TOPOLOGY_SOCKET_BUS, dev,
>> +                   TYPE_S390_TOPOLOGY_SOCKET_BUS);
>> +    qbus_set_hotplug_handler(bus, OBJECT(dev));
>> +    socket->bus = bus;
>> +}
>> +
>> +static void socket_class_init(ObjectClass *oc, void *data)
>> +{
>> +    DeviceClass *dc = DEVICE_CLASS(oc);
>> +    HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);
>> +
>> +    hc->unplug = qdev_simple_device_unplug_cb;
>> +    set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
>> +    dc->bus_type = TYPE_S390_TOPOLOGY_BOOK_BUS;
>> +    dc->realize = s390_socket_device_realize;
>> +    dc->desc = "topology socket";
>> +}
>> +
>> +static const TypeInfo socket_info = {
>> +    .name          = TYPE_S390_TOPOLOGY_SOCKET,
>> +    .parent        = TYPE_DEVICE,
>> +    .instance_size = sizeof(S390TopologySocket),
>> +    .class_init    = socket_class_init,
>> +    .interfaces = (InterfaceInfo[]) {
>> +        { TYPE_HOTPLUG_HANDLER },
>> +        { }
>> +    }
>> +};
>> +
>> +static char *book_bus_get_dev_path(DeviceState *dev)
>> +{
>> +    return g_strdup("00");
>> +}
>> +
>> +static void book_bus_class_init(ObjectClass *oc, void *data)
>> +{
>> +    BusClass *k = BUS_CLASS(oc);
>> +
>> +    k->get_dev_path = book_bus_get_dev_path;
>> +    k->max_dev = S390_MAX_BOOKS;
> 
> Same question as for socket_bus_class_init here.
> 
>> +}
>> +
>> +static const TypeInfo book_bus_info = {
>> +    .name = TYPE_S390_TOPOLOGY_BOOK_BUS,
>> +    .parent = TYPE_BUS,
>> +    .instance_size = 0,
>> +    .class_init = book_bus_class_init,
>> +};
>> +
>> +static void s390_book_device_realize(DeviceState *dev, Error **errp)
>> +{
>> +    S390TopologyBook *book = S390_TOPOLOGY_BOOK(dev);
>> +    BusState *bus;
>> +
>> +    bus = qbus_new(TYPE_S390_TOPOLOGY_BOOK_BUS, dev,
>> +                   TYPE_S390_TOPOLOGY_BOOK_BUS);
>> +    qbus_set_hotplug_handler(bus, OBJECT(dev));
>> +    book->bus = bus;
>> +}
>> +
>> +static void book_class_init(ObjectClass *oc, void *data)
>> +{
>> +    DeviceClass *dc = DEVICE_CLASS(oc);
>> +    HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);
>> +
>> +    hc->unplug = qdev_simple_device_unplug_cb;
>> +    set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
>> +    dc->realize = s390_book_device_realize;
>> +    dc->desc = "topology book";
>> +}
>> +
>> +static const TypeInfo book_info = {
>> +    .name          = TYPE_S390_TOPOLOGY_BOOK,
>> +    .parent        = TYPE_SYS_BUS_DEVICE,
>> +    .instance_size = sizeof(S390TopologyBook),
>> +    .class_init    = book_class_init,
>> +    .interfaces = (InterfaceInfo[]) {
>> +        { TYPE_HOTPLUG_HANDLER },
>> +        { }
>> +    }
>> +};
>> +
>> +static void topology_register(void)
>> +{
>> +    type_register_static(&cpu_cores_info);
>> +    type_register_static(&socket_bus_info);
>> +    type_register_static(&socket_info);
>> +    type_register_static(&book_bus_info);
>> +    type_register_static(&book_info);
>> +}
>> +
>> +type_init(topology_register);
>> diff --git a/hw/s390x/meson.build b/hw/s390x/meson.build
>> index feefe0717e..3592fa952b 100644
>> --- a/hw/s390x/meson.build
>> +++ b/hw/s390x/meson.build
>> @@ -2,6 +2,7 @@ s390x_ss = ss.source_set()
>>   s390x_ss.add(files(
>>     'ap-bridge.c',
>>     'ap-device.c',
>> +  'cpu-topology.c',
>>     'ccw-device.c',
>>     'css-bridge.c',
>>     'css.c',
>> diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
>> index cc3097bfee..a586875b24 100644
>> --- a/hw/s390x/s390-virtio-ccw.c
>> +++ b/hw/s390x/s390-virtio-ccw.c
>> @@ -43,6 +43,7 @@
>>   #include "sysemu/sysemu.h"
>>   #include "hw/s390x/pv.h"
>>   #include "migration/blocker.h"
>> +#include "hw/s390x/cpu-topology.h"
>>   
>>   static Error *pv_mig_blocker;
>>   
>> @@ -89,6 +90,7 @@ static void s390_init_cpus(MachineState *machine)
>>       /* initialize possible_cpus */
>>       mc->possible_cpu_arch_ids(machine);
>>   
>> +    s390_topology_setup(machine);
>>       for (i = 0; i < machine->smp.cpus; i++) {
>>           s390x_new_cpu(machine->cpu_type, i, &error_fatal);
>>       }
>> @@ -306,6 +308,10 @@ static void s390_cpu_plug(HotplugHandler *hotplug_dev,
>>       g_assert(!ms->possible_cpus->cpus[cpu->env.core_id].cpu);
>>       ms->possible_cpus->cpus[cpu->env.core_id].cpu = OBJECT(dev);
>>   
>> +    if (!s390_topology_new_cpu(ms, cpu->env.core_id, errp)) {
>> +        return;
>> +    }
>> +
>>       if (dev->hotplugged) {
>>           raise_irq_cpu_hotplug();
>>       }
>> diff --git a/include/hw/s390x/cpu-topology.h b/include/hw/s390x/cpu-topology.h
>> new file mode 100644
>> index 0000000000..beec61706c
>> --- /dev/null
>> +++ b/include/hw/s390x/cpu-topology.h
>> @@ -0,0 +1,74 @@
>> +/*
>> + * CPU Topology
>> + *
>> + * Copyright 2022 IBM Corp.
> 
> Same issue as with .c copyright notice.
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2 or (at
>> + * your option) any later version. See the COPYING file in the top-level
>> + * directory.
>> + */
>> +#ifndef HW_S390X_CPU_TOPOLOGY_H
>> +#define HW_S390X_CPU_TOPOLOGY_H
>> +
>> +#include "hw/qdev-core.h"
>> +#include "qom/object.h"
>> +
>> +#define S390_TOPOLOGY_CPU_TYPE    0x03
> 
> This is the IFL type, right? If so the name should reflect it.

OK

>> +
>> +#define S390_TOPOLOGY_POLARITY_H  0x00
>> +#define S390_TOPOLOGY_POLARITY_VL 0x01
>> +#define S390_TOPOLOGY_POLARITY_VM 0x02
>> +#define S390_TOPOLOGY_POLARITY_VH 0x03
> 
> Why not use an enum?

these are bits inside a bitfield, not a count, so I prefer a direct 
definition.

>> +
>> +#define TYPE_S390_TOPOLOGY_CORES "topology cores"
> 
> Seems to me that using a - instead of a space is the usual way of doing things.

right

>> +    /*
>> +     * Each CPU inside a socket will be represented by a bit in a 64bit
>> +     * unsigned long. Set on plug and clear on unplug of a CPU.
>> +     * All CPU inside a mask share the same dedicated, polarity and
>> +     * cputype values.
>> +     * The origin is the offset of the first CPU in a mask.
>> +     */
>> +struct S390TopologyCores {
>> +    DeviceState parent_obj;
>> +    int id;
>> +    bool dedicated;
>> +    uint8_t polarity;
>> +    uint8_t cputype;
> 
> Why not snake_case for cpu type?

I do not understand what you mean.

> 
>> +    uint16_t origin;
>> +    uint64_t mask;
>> +    int cnt;
> 
> num_cores instead ?

I suppress this it is unused

> 
>> +};
>> +typedef struct S390TopologyCores S390TopologyCores;
>> +OBJECT_DECLARE_SIMPLE_TYPE(S390TopologyCores, S390_TOPOLOGY_CORES)
>> +
>> +#define TYPE_S390_TOPOLOGY_SOCKET "topology socket"
>> +#define TYPE_S390_TOPOLOGY_SOCKET_BUS "socket-bus"
>> +struct S390TopologySocket {
>> +    DeviceState parent_obj;
>> +    BusState *bus;
>> +    int socket_id;
>> +    int cnt;
>> +};
>> +typedef struct S390TopologySocket S390TopologySocket;
>> +OBJECT_DECLARE_SIMPLE_TYPE(S390TopologySocket, S390_TOPOLOGY_SOCKET)
>> +#define S390_MAX_SOCKETS 4
>> +
>> +#define TYPE_S390_TOPOLOGY_BOOK "topology book"
>> +#define TYPE_S390_TOPOLOGY_BOOK_BUS "book-bus"
>> +struct S390TopologyBook {
>> +    SysBusDevice parent_obj;
>> +    BusState *bus;
>> +    int book_id;
>> +    int cnt;
>> +};
>> +typedef struct S390TopologyBook S390TopologyBook;
>> +OBJECT_DECLARE_SIMPLE_TYPE(S390TopologyBook, S390_TOPOLOGY_BOOK)
>> +#define S390_MAX_BOOKS 1
>> +
>> +S390TopologyBook *s390_init_topology(void);
>> +
>> +S390TopologyBook *s390_get_topology(void);
>> +void s390_topology_setup(MachineState *ms);
>> +bool s390_topology_new_cpu(MachineState *ms, int core_id, Error **errp);
>> +
>> +#endif
>> diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h
>> index 7d6d01325b..216adfde26 100644
>> --- a/target/s390x/cpu.h
>> +++ b/target/s390x/cpu.h
> 
> I think these definitions should be moved to the STSI patch since they're not used in this one.

right

> 
>> @@ -565,6 +565,53 @@ typedef union SysIB {
>>   } SysIB;
>>   QEMU_BUILD_BUG_ON(sizeof(SysIB) != 4096);
>>   
>> +/* CPU type Topology List Entry */
>> +typedef struct SysIBTl_cpu {
>> +        uint8_t nl;
>> +        uint8_t reserved0[3];
>> +        uint8_t reserved1:5;
>> +        uint8_t dedicated:1;
>> +        uint8_t polarity:2;
>> +        uint8_t type;
>> +        uint16_t origin;
>> +        uint64_t mask;
>> +} SysIBTl_cpu;
>> +QEMU_BUILD_BUG_ON(sizeof(SysIBTl_cpu) != 16);
>> +
>> +/* Container type Topology List Entry */
>> +typedef struct SysIBTl_container {
>> +        uint8_t nl;
>> +        uint8_t reserved[6];
>> +        uint8_t id;
>> +} QEMU_PACKED SysIBTl_container;
>> +QEMU_BUILD_BUG_ON(sizeof(SysIBTl_container) != 8);
>> +
>> +/* Generic Topology List Entry */
>> +typedef union SysIBTl_entry {
>> +        uint8_t nl;
>> +        SysIBTl_container container;
>> +        SysIBTl_cpu cpu;
>> +} SysIBTl_entry;
> 
> I don't like this union, it's only used in SysIB_151x below and that's misleading,
> because the entries are packed without padding, but the union members have different
> sizes.

the entries have different sizes 64bits and 128bits.
I do not understand why they should be padded.

However, the union here is useless. will remove it.

> 
>> +
>> +#define TOPOLOGY_NR_MAG  6
>> +#define TOPOLOGY_NR_MAG6 0
>> +#define TOPOLOGY_NR_MAG5 1
>> +#define TOPOLOGY_NR_MAG4 2
>> +#define TOPOLOGY_NR_MAG3 3
>> +#define TOPOLOGY_NR_MAG2 4
>> +#define TOPOLOGY_NR_MAG1 5
>> +/* Configuration topology */
>> +typedef struct SysIB_151x {
>> +    uint8_t  res0[2];
>> +    uint16_t length;
>> +    uint8_t  mag[TOPOLOGY_NR_MAG];
>> +    uint8_t  res1;
>> +    uint8_t  mnest;
>> +    uint32_t res2;
>> +    SysIBTl_entry tle[0];
> 
> I think this should just be a uint64_t[] or uint64_t[0], whichever is QEMU style.

ok

>> +} SysIB_151x;
>> +QEMU_BUILD_BUG_ON(sizeof(SysIB_151x) != 16);
>> +
>>   /* MMU defines */
>>   #define ASCE_ORIGIN           (~0xfffULL) /* segment table origin             */
>>   #define ASCE_SUBSPACE         0x200       /* subspace group control           */
>
Janis Schoetterl-Glausch July 14, 2022, 10:38 a.m. UTC | #8
On 7/13/22 16:59, Pierre Morel wrote:
> 
> 
> On 7/12/22 17:40, Janis Schoetterl-Glausch wrote:
>> On 6/20/22 16:03, Pierre Morel wrote:
>>> We use new objects to have a dynamic administration of the CPU topology.
>>> The highest level object in this implementation is the s390 book and
>>> in this first implementation of CPU topology for S390 we have a single
>>> book.
>>> The book is built as a SYSBUS bridge during the CPU initialization.
>>> Other objects, sockets and core will be built after the parsing
>>> of the QEMU -smp argument.
>>>
>>> Every object under this single book will be build dynamically
>>> immediately after a CPU has be realized if it is needed.
>>> The CPU will fill the sockets once after the other, according to the
>>> number of core per socket defined during the smp parsing.
>>>
>>> Each CPU inside a socket will be represented by a bit in a 64bit
>>> unsigned long. Set on plug and clear on unplug of a CPU.
>>>
>>> For the S390 CPU topology, thread and cores are merged into
>>> topology cores and the number of topology cores is the multiplication
>>> of cores by the numbers of threads.
>>>
>>> Signed-off-by: Pierre Morel <pmorel@linux.ibm.com>
>>> ---
>>>   hw/s390x/cpu-topology.c         | 391 ++++++++++++++++++++++++++++++++
>>>   hw/s390x/meson.build            |   1 +
>>>   hw/s390x/s390-virtio-ccw.c      |   6 +
>>>   include/hw/s390x/cpu-topology.h |  74 ++++++
>>>   target/s390x/cpu.h              |  47 ++++
>>>   5 files changed, 519 insertions(+)
>>>   create mode 100644 hw/s390x/cpu-topology.c
>>>   create mode 100644 include/hw/s390x/cpu-topology.h
>>>

[...]

>>> +}
>>> +
>>> +/*
>>> + * s390_topology_new_cpu:
>>> + * @core_id: the core ID is machine wide
>>> + *
>>> + * We have a single book returned by s390_get_topology(),
>>> + * then we build the hierarchy on demand.
>>> + * Note that we do not destroy the hierarchy on error creating
>>> + * an entry in the topology, we just keep it empty.
>>> + * We do not need to worry about not finding a topology level
>>> + * entry this would have been caught during smp parsing.
>>> + */
>>> +bool s390_topology_new_cpu(MachineState *ms, int core_id, Error **errp)
>>> +{
>>> +    S390TopologyBook *book;
>>> +    S390TopologySocket *socket;
>>> +    S390TopologyCores *cores;
>>> +    int nb_cores_per_socket;
>>
>> num_cores_per_socket instead?
>>
>>> +    int origin, bit;
>>> +
>>> +    book = s390_get_topology();
>>> +
>>> +    nb_cores_per_socket = ms->smp.cores * ms->smp.threads;
>>
>> We don't support the multithreading facility, do we?
>> So, I think we should assert smp.threads == 1 somewhere.
>> In any case I think the correct expression would round the threads up to the next power of 2,
>> because the core_id has the thread id in the lower bits, but threads per core doesn't need to be
>> a power of 2 according to the architecture.
> 
> That is right.
> I will add that.

Add the assert?
It should probably be somewhere else.
And you can set thread > 1 today, so we'd need to handle that. (increase the number of cpus instead and print a warning?)

[...]

>>> +
>>> +/*
>>> + * Setting the first topology: 1 book, 1 socket
>>> + * This is enough for 64 cores if the topology is flat (single socket)
>>> + */
>>> +void s390_topology_setup(MachineState *ms)
>>> +{
>>> +    DeviceState *dev;
>>> +
>>> +    /* Create BOOK bridge device */
>>> +    dev = qdev_new(TYPE_S390_TOPOLOGY_BOOK);
>>> +    object_property_add_child(qdev_get_machine(),
>>> +                              TYPE_S390_TOPOLOGY_BOOK, OBJECT(dev));
>>
>> Why add it to the machine instead of directly using a static?
> 
> For my opinion it is a characteristic of the machine.
> 
>> So it's visible to the user via info qtree or something?
> 
> It is already visible to the user on info qtree.
> 
>> Would that even be the appropriate location to show that?
> 
> That is a very good question and I really appreciate if we discuss on the design before diving into details.
> 
> The idea is to have the architecture details being on qtree as object so we can plug new drawers/books/socket/cores and in the future when the infrastructure allows it unplug them.

Would it not be more accurate to say that we plug in new cpus only?
Since you need to specify the topology up front with -smp and it cannot change after.
So that all is static, books/sockets might be completely unpopulated, but they still exist in a way.
As far as I understand, STSI only allows for cpus to change, nothing above it.
> 
> There is a info numa (info cpus does not give a lot info) to give information on nodes but AFAIU, a node is more a theoritical that can be used above the virtual architecture, sockets/cores, to specify characteristics like distance and associated memory.

https://qemu.readthedocs.io/en/latest/interop/qemu-qmp-ref.html#qapidoc-2391
shows that the relevant information can be queried via qmp.
When I tried it on s390x it only showed the core_id, but we should be able to add the rest.


Am I correct in my understanding, that there are two reasons to have the hierarchy objects:
1. Caching the topology instead of computing it when STSI is called
2. So they show up in info qtree

?

> 
> As I understand it can be used above socket and for us above books or drawers too like in:
> 
> -numa cpu,node-id=0,socket-id=0
> 
> All cores in socket 0 belong to node 0
> 
> or
> -numa cpu,node-id=1,drawer-id=1
> 
> all cores from all sockets of drawer 1 belong to node 1
> 
> 
> As there is no info socket, I think that for now we do not need an info book/drawer we have everything in qtree.
> 
> 
>>

[...]

> 
>>> +    /*
>>> +     * Each CPU inside a socket will be represented by a bit in a 64bit
>>> +     * unsigned long. Set on plug and clear on unplug of a CPU.
>>> +     * All CPU inside a mask share the same dedicated, polarity and
>>> +     * cputype values.
>>> +     * The origin is the offset of the first CPU in a mask.
>>> +     */
>>> +struct S390TopologyCores {
>>> +    DeviceState parent_obj;
>>> +    int id;
>>> +    bool dedicated;
>>> +    uint8_t polarity;
>>> +    uint8_t cputype;
>>
>> Why not snake_case for cpu type?
> 
> I do not understand what you mean.

I'm suggesting s/cputype/cpu_type/
> 
>>
>>> +    uint16_t origin;
>>> +    uint64_t mask;
>>> +    int cnt;
>>
>> num_cores instead ?
> 
> I suppress this it is unused
> 
>>

[...]

>>> @@ -565,6 +565,53 @@ typedef union SysIB {
>>>   } SysIB;
>>>   QEMU_BUILD_BUG_ON(sizeof(SysIB) != 4096);
>>>   +/* CPU type Topology List Entry */
>>> +typedef struct SysIBTl_cpu {
>>> +        uint8_t nl;
>>> +        uint8_t reserved0[3];
>>> +        uint8_t reserved1:5;
>>> +        uint8_t dedicated:1;
>>> +        uint8_t polarity:2;
>>> +        uint8_t type;
>>> +        uint16_t origin;
>>> +        uint64_t mask;
>>> +} SysIBTl_cpu;
>>> +QEMU_BUILD_BUG_ON(sizeof(SysIBTl_cpu) != 16);
>>> +
>>> +/* Container type Topology List Entry */
>>> +typedef struct SysIBTl_container {
>>> +        uint8_t nl;
>>> +        uint8_t reserved[6];
>>> +        uint8_t id;
>>> +} QEMU_PACKED SysIBTl_container;
>>> +QEMU_BUILD_BUG_ON(sizeof(SysIBTl_container) != 8);
>>> +
>>> +/* Generic Topology List Entry */
>>> +typedef union SysIBTl_entry {
>>> +        uint8_t nl;
>>> +        SysIBTl_container container;
>>> +        SysIBTl_cpu cpu;
>>> +} SysIBTl_entry;
>>
>> I don't like this union, it's only used in SysIB_151x below and that's misleading,
>> because the entries are packed without padding, but the union members have different
>> sizes.
> 
> the entries have different sizes 64bits and 128bits.
> I do not understand why they should be padded.

I way saying that in the SYSIB there is no padding, but the size of the union is 16,
so two container entries in the array would have padding, which is misleading.
There is no actual problem, since the array is not actually used as such.
> 
> However, the union here is useless. will remove it.
> 
>>
>>> +
>>> +#define TOPOLOGY_NR_MAG  6
>>> +#define TOPOLOGY_NR_MAG6 0
>>> +#define TOPOLOGY_NR_MAG5 1
>>> +#define TOPOLOGY_NR_MAG4 2
>>> +#define TOPOLOGY_NR_MAG3 3
>>> +#define TOPOLOGY_NR_MAG2 4
>>> +#define TOPOLOGY_NR_MAG1 5
>>> +/* Configuration topology */
>>> +typedef struct SysIB_151x {
>>> +    uint8_t  res0[2];
>>> +    uint16_t length;
>>> +    uint8_t  mag[TOPOLOGY_NR_MAG];
>>> +    uint8_t  res1;
>>> +    uint8_t  mnest;
>>> +    uint32_t res2;
>>> +    SysIBTl_entry tle[0];
>>
>> I think this should just be a uint64_t[] or uint64_t[0], whichever is QEMU style.
> 
> ok
> 
>>> +} SysIB_151x;
>>> +QEMU_BUILD_BUG_ON(sizeof(SysIB_151x) != 16);
>>> +
>>>   /* MMU defines */
>>>   #define ASCE_ORIGIN           (~0xfffULL) /* segment table origin             */
>>>   #define ASCE_SUBSPACE         0x200       /* subspace group control           */
>>
>
Pierre Morel July 14, 2022, 11:25 a.m. UTC | #9
On 7/14/22 12:38, Janis Schoetterl-Glausch wrote:
> On 7/13/22 16:59, Pierre Morel wrote:
>>
>>
>> On 7/12/22 17:40, Janis Schoetterl-Glausch wrote:
>>> On 6/20/22 16:03, Pierre Morel wrote:

> 
> [...]
> 
>>>> +}
>>>> +
>>>> +/*
>>>> + * s390_topology_new_cpu:
>>>> + * @core_id: the core ID is machine wide
>>>> + *
>>>> + * We have a single book returned by s390_get_topology(),
>>>> + * then we build the hierarchy on demand.
>>>> + * Note that we do not destroy the hierarchy on error creating
>>>> + * an entry in the topology, we just keep it empty.
>>>> + * We do not need to worry about not finding a topology level
>>>> + * entry this would have been caught during smp parsing.
>>>> + */
>>>> +bool s390_topology_new_cpu(MachineState *ms, int core_id, Error **errp)
>>>> +{
>>>> +    S390TopologyBook *book;
>>>> +    S390TopologySocket *socket;
>>>> +    S390TopologyCores *cores;
>>>> +    int nb_cores_per_socket;
>>>
>>> num_cores_per_socket instead?
>>>
>>>> +    int origin, bit;
>>>> +
>>>> +    book = s390_get_topology();
>>>> +
>>>> +    nb_cores_per_socket = ms->smp.cores * ms->smp.threads;
>>>
>>> We don't support the multithreading facility, do we?
>>> So, I think we should assert smp.threads == 1 somewhere.
>>> In any case I think the correct expression would round the threads up to the next power of 2,
>>> because the core_id has the thread id in the lower bits, but threads per core doesn't need to be
>>> a power of 2 according to the architecture.
>>
>> That is right.
>> I will add that.
> 
> Add the assert?
> It should probably be somewhere else.

That is sure.
I thought about put a fatal error report during the initialization in 
the s390_topology_setup()

> And you can set thread > 1 today, so we'd need to handle that. (increase the number of cpus instead and print a warning?)
> 
> [...]

this would introduce arch dependencies in the hw/core/
I think that the error report for Z is enough.

So once we support Multithreading in the guest we can adjust it easier 
without involving the common code.

Or we can introduce a thread_supported in SMPCompatProps, which would be 
good.
I would prefer to propose this outside of the series and suppress the 
fatal error once it is adopted.

> 
>>>> +
>>>> +/*
>>>> + * Setting the first topology: 1 book, 1 socket
>>>> + * This is enough for 64 cores if the topology is flat (single socket)
>>>> + */
>>>> +void s390_topology_setup(MachineState *ms)
>>>> +{
>>>> +    DeviceState *dev;
>>>> +
>>>> +    /* Create BOOK bridge device */
>>>> +    dev = qdev_new(TYPE_S390_TOPOLOGY_BOOK);
>>>> +    object_property_add_child(qdev_get_machine(),
>>>> +                              TYPE_S390_TOPOLOGY_BOOK, OBJECT(dev));
>>>
>>> Why add it to the machine instead of directly using a static?
>>
>> For my opinion it is a characteristic of the machine.
>>
>>> So it's visible to the user via info qtree or something?
>>
>> It is already visible to the user on info qtree.
>>
>>> Would that even be the appropriate location to show that?
>>
>> That is a very good question and I really appreciate if we discuss on the design before diving into details.
>>
>> The idea is to have the architecture details being on qtree as object so we can plug new drawers/books/socket/cores and in the future when the infrastructure allows it unplug them.
> 
> Would it not be more accurate to say that we plug in new cpus only?
> Since you need to specify the topology up front with -smp and it cannot change after.

smp specify the maximum we can have.
I thought we can add dynamically elements inside this maximum set.

> So that all is static, books/sockets might be completely unpopulated, but they still exist in a way.
> As far as I understand, STSI only allows for cpus to change, nothing above it.

I thought we want to plug new books or drawers but I may be wrong.

>>
>> There is a info numa (info cpus does not give a lot info) to give information on nodes but AFAIU, a node is more a theoritical that can be used above the virtual architecture, sockets/cores, to specify characteristics like distance and associated memory.
> 
> https://qemu.readthedocs.io/en/latest/interop/qemu-qmp-ref.html#qapidoc-2391
> shows that the relevant information can be queried via qmp.
> When I tried it on s390x it only showed the core_id, but we should be able to add the rest.

yes, sure.

> 
> 
> Am I correct in my understanding, that there are two reasons to have the hierarchy objects:
> 1. Caching the topology instead of computing it when STSI is called
> 2. So they show up in info qtree
> 
> ?

and have the possibility to add the objects dynamically. yes


> [...]
> 
>>
>>>> +    /*
>>>> +     * Each CPU inside a socket will be represented by a bit in a 64bit
>>>> +     * unsigned long. Set on plug and clear on unplug of a CPU.
>>>> +     * All CPU inside a mask share the same dedicated, polarity and
>>>> +     * cputype values.
>>>> +     * The origin is the offset of the first CPU in a mask.
>>>> +     */
>>>> +struct S390TopologyCores {
>>>> +    DeviceState parent_obj;
>>>> +    int id;
>>>> +    bool dedicated;
>>>> +    uint8_t polarity;
>>>> +    uint8_t cputype;
>>>
>>> Why not snake_case for cpu type?
>>
>> I do not understand what you mean.
> 
> I'm suggesting s/cputype/cpu_type/

ok


Thanks,

regards,
Pierre
Janis Schoetterl-Glausch July 14, 2022, 12:50 p.m. UTC | #10
On 7/14/22 13:25, Pierre Morel wrote:

[...]

> 
> That is sure.
> I thought about put a fatal error report during the initialization in the s390_topology_setup()
> 
>> And you can set thread > 1 today, so we'd need to handle that. (increase the number of cpus instead and print a warning?)
>>
>> [...]
> 
> this would introduce arch dependencies in the hw/core/
> I think that the error report for Z is enough.
> 
> So once we support Multithreading in the guest we can adjust it easier without involving the common code.
> 
> Or we can introduce a thread_supported in SMPCompatProps, which would be good.
> I would prefer to propose this outside of the series and suppress the fatal error once it is adopted.
> 

Yeah, could be a separate series, but then the question remains what you in this one, that is
if you change the code so it would be correct if multithreading were supported.
>>
>>>>> +
>>>>> +/*
>>>>> + * Setting the first topology: 1 book, 1 socket
>>>>> + * This is enough for 64 cores if the topology is flat (single socket)
>>>>> + */
>>>>> +void s390_topology_setup(MachineState *ms)
>>>>> +{
>>>>> +    DeviceState *dev;
>>>>> +
>>>>> +    /* Create BOOK bridge device */
>>>>> +    dev = qdev_new(TYPE_S390_TOPOLOGY_BOOK);
>>>>> +    object_property_add_child(qdev_get_machine(),
>>>>> +                              TYPE_S390_TOPOLOGY_BOOK, OBJECT(dev));
>>>>
>>>> Why add it to the machine instead of directly using a static?
>>>
>>> For my opinion it is a characteristic of the machine.
>>>
>>>> So it's visible to the user via info qtree or something?
>>>
>>> It is already visible to the user on info qtree.
>>>
>>>> Would that even be the appropriate location to show that?
>>>
>>> That is a very good question and I really appreciate if we discuss on the design before diving into details.
>>>
>>> The idea is to have the architecture details being on qtree as object so we can plug new drawers/books/socket/cores and in the future when the infrastructure allows it unplug them.
>>
>> Would it not be more accurate to say that we plug in new cpus only?
>> Since you need to specify the topology up front with -smp and it cannot change after.
> 
> smp specify the maximum we can have.
> I thought we can add dynamically elements inside this maximum set.
> 
>> So that all is static, books/sockets might be completely unpopulated, but they still exist in a way.
>> As far as I understand, STSI only allows for cpus to change, nothing above it.
> 
> I thought we want to plug new books or drawers but I may be wrong.

So you want to be able to plug in, for example, a socket without any cpus in it?
I'm not seeing anything in the description of STSI that forbids having empty containers
or containers with a cpu entry without any cpus. But I don't know why that would be useful.
And if you don't want empty containers, then the container will just show up when plugging in the cpu.
> 
>>>
>>> There is a info numa (info cpus does not give a lot info) to give information on nodes but AFAIU, a node is more a theoritical that can be used above the virtual architecture, sockets/cores, to specify characteristics like distance and associated memory.
>>
>> https://qemu.readthedocs.io/en/latest/interop/qemu-qmp-ref.html#qapidoc-2391
>> shows that the relevant information can be queried via qmp.
>> When I tried it on s390x it only showed the core_id, but we should be able to add the rest.
> 
> yes, sure.> 
>>
>>
>> Am I correct in my understanding, that there are two reasons to have the hierarchy objects:
>> 1. Caching the topology instead of computing it when STSI is called
>> 2. So they show up in info qtree
>>
>> ?
> 
> and have the possibility to add the objects dynamically. yes
> 
[...]
Pierre Morel July 14, 2022, 7:26 p.m. UTC | #11
On 7/14/22 14:50, Janis Schoetterl-Glausch wrote:
> On 7/14/22 13:25, Pierre Morel wrote:
> 
> [...]
> 
>>
>> That is sure.
>> I thought about put a fatal error report during the initialization in the s390_topology_setup()
>>
>>> And you can set thread > 1 today, so we'd need to handle that. (increase the number of cpus instead and print a warning?)
>>>
>>> [...]
>>
>> this would introduce arch dependencies in the hw/core/
>> I think that the error report for Z is enough.
>>
>> So once we support Multithreading in the guest we can adjust it easier without involving the common code.
>>
>> Or we can introduce a thread_supported in SMPCompatProps, which would be good.
>> I would prefer to propose this outside of the series and suppress the fatal error once it is adopted.
>>
> 
> Yeah, could be a separate series, but then the question remains what you in this one, that is
> if you change the code so it would be correct if multithreading were supported.

I would like to first not support multi-thread and do a fatal error if 
threads are defined or implicitly defined as different of 1.

I prefer to keep multithreading for later, I did not have a look at all 
the implications for the moment.

>>>
>>>>>> +
>>>>>> +/*
>>>>>> + * Setting the first topology: 1 book, 1 socket
>>>>>> + * This is enough for 64 cores if the topology is flat (single socket)
>>>>>> + */
>>>>>> +void s390_topology_setup(MachineState *ms)
>>>>>> +{
>>>>>> +    DeviceState *dev;
>>>>>> +
>>>>>> +    /* Create BOOK bridge device */
>>>>>> +    dev = qdev_new(TYPE_S390_TOPOLOGY_BOOK);
>>>>>> +    object_property_add_child(qdev_get_machine(),
>>>>>> +                              TYPE_S390_TOPOLOGY_BOOK, OBJECT(dev));
>>>>>
>>>>> Why add it to the machine instead of directly using a static?
>>>>
>>>> For my opinion it is a characteristic of the machine.
>>>>
>>>>> So it's visible to the user via info qtree or something?
>>>>
>>>> It is already visible to the user on info qtree.
>>>>
>>>>> Would that even be the appropriate location to show that?
>>>>
>>>> That is a very good question and I really appreciate if we discuss on the design before diving into details.
>>>>
>>>> The idea is to have the architecture details being on qtree as object so we can plug new drawers/books/socket/cores and in the future when the infrastructure allows it unplug them.
>>>
>>> Would it not be more accurate to say that we plug in new cpus only?
>>> Since you need to specify the topology up front with -smp and it cannot change after.
>>
>> smp specify the maximum we can have.
>> I thought we can add dynamically elements inside this maximum set.
>>
>>> So that all is static, books/sockets might be completely unpopulated, but they still exist in a way.
>>> As far as I understand, STSI only allows for cpus to change, nothing above it.
>>
>> I thought we want to plug new books or drawers but I may be wrong.
> 
> So you want to be able to plug in, for example, a socket without any cpus in it?
> I'm not seeing anything in the description of STSI that forbids having empty containers
> or containers with a cpu entry without any cpus. But I don't know why that would be useful.
> And if you don't want empty containers, then the container will just show up when plugging in the cpu.

You already convinced me, it is a non sense and, anyway, building every 
container when a cpu is added is how it works with the current 
implementation.
Thomas Huth Aug. 23, 2022, 1:30 p.m. UTC | #12
On 20/06/2022 16.03, Pierre Morel wrote:
> We use new objects to have a dynamic administration of the CPU topology.
> The highest level object in this implementation is the s390 book and
> in this first implementation of CPU topology for S390 we have a single
> book.
> The book is built as a SYSBUS bridge during the CPU initialization.
> Other objects, sockets and core will be built after the parsing
> of the QEMU -smp argument.
> 
> Every object under this single book will be build dynamically
> immediately after a CPU has be realized if it is needed.
> The CPU will fill the sockets once after the other, according to the
> number of core per socket defined during the smp parsing.
> 
> Each CPU inside a socket will be represented by a bit in a 64bit
> unsigned long. Set on plug and clear on unplug of a CPU.
> 
> For the S390 CPU topology, thread and cores are merged into
> topology cores and the number of topology cores is the multiplication
> of cores by the numbers of threads.
> 
> Signed-off-by: Pierre Morel <pmorel@linux.ibm.com>
> ---
>   hw/s390x/cpu-topology.c         | 391 ++++++++++++++++++++++++++++++++
>   hw/s390x/meson.build            |   1 +
>   hw/s390x/s390-virtio-ccw.c      |   6 +
>   include/hw/s390x/cpu-topology.h |  74 ++++++
>   target/s390x/cpu.h              |  47 ++++
>   5 files changed, 519 insertions(+)
>   create mode 100644 hw/s390x/cpu-topology.c
>   create mode 100644 include/hw/s390x/cpu-topology.h
...
> +bool s390_topology_new_cpu(MachineState *ms, int core_id, Error **errp)
> +{
> +    S390TopologyBook *book;
> +    S390TopologySocket *socket;
> +    S390TopologyCores *cores;
> +    int nb_cores_per_socket;
> +    int origin, bit;
> +
> +    book = s390_get_topology();
> +
> +    nb_cores_per_socket = ms->smp.cores * ms->smp.threads;
> +
> +    socket = s390_get_socket(ms, book, core_id / nb_cores_per_socket, errp);
> +    if (!socket) {
> +        return false;
> +    }
> +
> +    /*
> +     * At the core level, each CPU is represented by a bit in a 64bit
> +     * unsigned long. Set on plug and clear on unplug of a CPU.
> +     * The firmware assume that all CPU in the core description have the same
> +     * type, polarization and are all dedicated or shared.
> +     * In the case a socket contains CPU with different type, polarization
> +     * or dedication then they will be defined in different CPU containers.
> +     * Currently we assume all CPU are identical and the only reason to have
> +     * several S390TopologyCores inside a socket is to have more than 64 CPUs
> +     * in that case the origin field, representing the offset of the first CPU
> +     * in the CPU container allows to represent up to the maximal number of
> +     * CPU inside several CPU containers inside the socket container.
> +     */
> +    origin = 64 * (core_id / 64);

Maybe faster:

	origin = core_id & ~63;

By the way, where is this limitation to 64 coming from? Just because we're 
using a "unsigned long" for now? Or is this a limitation from the architecture?

> +    cores = s390_get_cores(ms, socket, origin, errp);
> +    if (!cores) {
> +        return false;
> +    }
> +
> +    bit = 63 - (core_id - origin);
> +    set_bit(bit, &cores->mask);
> +    cores->origin = origin;
> +
> +    return true;
> +}
...
> diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
> index cc3097bfee..a586875b24 100644
> --- a/hw/s390x/s390-virtio-ccw.c
> +++ b/hw/s390x/s390-virtio-ccw.c
> @@ -43,6 +43,7 @@
>   #include "sysemu/sysemu.h"
>   #include "hw/s390x/pv.h"
>   #include "migration/blocker.h"
> +#include "hw/s390x/cpu-topology.h"
>   
>   static Error *pv_mig_blocker;
>   
> @@ -89,6 +90,7 @@ static void s390_init_cpus(MachineState *machine)
>       /* initialize possible_cpus */
>       mc->possible_cpu_arch_ids(machine);
>   
> +    s390_topology_setup(machine);

Is this safe with regards to migration? Did you tried a ping-pong migration 
from an older QEMU to a QEMU with your modifications and back to the older 
one? If it does not work, we might need to wire this setup to the machine 
types...

>       for (i = 0; i < machine->smp.cpus; i++) {
>           s390x_new_cpu(machine->cpu_type, i, &error_fatal);
>       }
> @@ -306,6 +308,10 @@ static void s390_cpu_plug(HotplugHandler *hotplug_dev,
>       g_assert(!ms->possible_cpus->cpus[cpu->env.core_id].cpu);
>       ms->possible_cpus->cpus[cpu->env.core_id].cpu = OBJECT(dev);
>   
> +    if (!s390_topology_new_cpu(ms, cpu->env.core_id, errp)) {
> +        return;
> +    }
> +
>       if (dev->hotplugged) {
>           raise_irq_cpu_hotplug();
>       }

  Thomas
Pierre Morel Aug. 23, 2022, 4:30 p.m. UTC | #13
On 8/23/22 15:30, Thomas Huth wrote:
> On 20/06/2022 16.03, Pierre Morel wrote:
>> We use new objects to have a dynamic administration of the CPU topology.
>> The highest level object in this implementation is the s390 book and
>> in this first implementation of CPU topology for S390 we have a single
>> book.
>> The book is built as a SYSBUS bridge during the CPU initialization.
>> Other objects, sockets and core will be built after the parsing
>> of the QEMU -smp argument.
>>
>> Every object under this single book will be build dynamically
>> immediately after a CPU has be realized if it is needed.
>> The CPU will fill the sockets once after the other, according to the
>> number of core per socket defined during the smp parsing.
>>
>> Each CPU inside a socket will be represented by a bit in a 64bit
>> unsigned long. Set on plug and clear on unplug of a CPU.
>>
>> For the S390 CPU topology, thread and cores are merged into
>> topology cores and the number of topology cores is the multiplication
>> of cores by the numbers of threads.
>>
>> Signed-off-by: Pierre Morel <pmorel@linux.ibm.com>
>> ---
>>   hw/s390x/cpu-topology.c         | 391 ++++++++++++++++++++++++++++++++
>>   hw/s390x/meson.build            |   1 +
>>   hw/s390x/s390-virtio-ccw.c      |   6 +
>>   include/hw/s390x/cpu-topology.h |  74 ++++++
>>   target/s390x/cpu.h              |  47 ++++
>>   5 files changed, 519 insertions(+)
>>   create mode 100644 hw/s390x/cpu-topology.c
>>   create mode 100644 include/hw/s390x/cpu-topology.h
> ...
>> +bool s390_topology_new_cpu(MachineState *ms, int core_id, Error **errp)
>> +{
>> +    S390TopologyBook *book;
>> +    S390TopologySocket *socket;
>> +    S390TopologyCores *cores;
>> +    int nb_cores_per_socket;
>> +    int origin, bit;
>> +
>> +    book = s390_get_topology();
>> +
>> +    nb_cores_per_socket = ms->smp.cores * ms->smp.threads;
>> +
>> +    socket = s390_get_socket(ms, book, core_id / nb_cores_per_socket, 
>> errp);
>> +    if (!socket) {
>> +        return false;
>> +    }
>> +
>> +    /*
>> +     * At the core level, each CPU is represented by a bit in a 64bit
>> +     * unsigned long. Set on plug and clear on unplug of a CPU.
>> +     * The firmware assume that all CPU in the core description have 
>> the same
>> +     * type, polarization and are all dedicated or shared.
>> +     * In the case a socket contains CPU with different type, 
>> polarization
>> +     * or dedication then they will be defined in different CPU 
>> containers.
>> +     * Currently we assume all CPU are identical and the only reason 
>> to have
>> +     * several S390TopologyCores inside a socket is to have more than 
>> 64 CPUs
>> +     * in that case the origin field, representing the offset of the 
>> first CPU
>> +     * in the CPU container allows to represent up to the maximal 
>> number of
>> +     * CPU inside several CPU containers inside the socket container.
>> +     */
>> +    origin = 64 * (core_id / 64);
> 
> Maybe faster:
> 
>      origin = core_id & ~63;

yes thanks

> 
> By the way, where is this limitation to 64 coming from? Just because 
> we're using a "unsigned long" for now? Or is this a limitation from the 
> architecture?
> 

It is a limitation from the architecture who use a 64bit field to 
represent the CPUs in a CPU TLE mask.

but this patch serie is quite old now and I changed some things 
according to the comments I received
I plan to send the new version before the end of the week.


>> +    cores = s390_get_cores(ms, socket, origin, errp);
>> +    if (!cores) {
>> +        return false;
>> +    }
>> +
>> +    bit = 63 - (core_id - origin);
>> +    set_bit(bit, &cores->mask);
>> +    cores->origin = origin;
>> +
>> +    return true;
>> +}
> ...
>> diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
>> index cc3097bfee..a586875b24 100644
>> --- a/hw/s390x/s390-virtio-ccw.c
>> +++ b/hw/s390x/s390-virtio-ccw.c
>> @@ -43,6 +43,7 @@
>>   #include "sysemu/sysemu.h"
>>   #include "hw/s390x/pv.h"
>>   #include "migration/blocker.h"
>> +#include "hw/s390x/cpu-topology.h"
>>   static Error *pv_mig_blocker;
>> @@ -89,6 +90,7 @@ static void s390_init_cpus(MachineState *machine)
>>       /* initialize possible_cpus */
>>       mc->possible_cpu_arch_ids(machine);
>> +    s390_topology_setup(machine);
> 
> Is this safe with regards to migration? Did you tried a ping-pong 
> migration from an older QEMU to a QEMU with your modifications and back 
> to the older one? If it does not work, we might need to wire this setup 
> to the machine types...

I did not, I will add this test.


> 
>>       for (i = 0; i < machine->smp.cpus; i++) {
>>           s390x_new_cpu(machine->cpu_type, i, &error_fatal);
>>       }
>> @@ -306,6 +308,10 @@ static void s390_cpu_plug(HotplugHandler 
>> *hotplug_dev,
>>       g_assert(!ms->possible_cpus->cpus[cpu->env.core_id].cpu);
>>       ms->possible_cpus->cpus[cpu->env.core_id].cpu = OBJECT(dev);
>> +    if (!s390_topology_new_cpu(ms, cpu->env.core_id, errp)) {
>> +        return;
>> +    }
>> +
>>       if (dev->hotplugged) {
>>           raise_irq_cpu_hotplug();
>>       }
> 
>   Thomas
> 

Thanks Thomas,

Regards,
Pierre
Pierre Morel Aug. 23, 2022, 5:41 p.m. UTC | #14
On 8/23/22 15:30, Thomas Huth wrote:
> On 20/06/2022 16.03, Pierre Morel wrote:
>> We use new objects to have a dynamic administration of the CPU topology.
>> The highest level object in this implementation is the s390 book and
>> in this first implementation of CPU topology for S390 we have a single
>> book.
>> The book is built as a SYSBUS bridge during the CPU initialization.
>> Other objects, sockets and core will be built after the parsing
>> of the QEMU -smp argument.
>>
>> Every object under this single book will be build dynamically
>> immediately after a CPU has be realized if it is needed.
>> The CPU will fill the sockets once after the other, according to the
>> number of core per socket defined during the smp parsing.
>>
>> Each CPU inside a socket will be represented by a bit in a 64bit
>> unsigned long. Set on plug and clear on unplug of a CPU.
>>
>> For the S390 CPU topology, thread and cores are merged into
>> topology cores and the number of topology cores is the multiplication
>> of cores by the numbers of threads.
>>
>> Signed-off-by: Pierre Morel <pmorel@linux.ibm.com>
>> ---
>>   hw/s390x/cpu-topology.c         | 391 ++++++++++++++++++++++++++++++++
>>   hw/s390x/meson.build            |   1 +
>>   hw/s390x/s390-virtio-ccw.c      |   6 +
>>   include/hw/s390x/cpu-topology.h |  74 ++++++
>>   target/s390x/cpu.h              |  47 ++++
>>   5 files changed, 519 insertions(+)
>>   create mode 100644 hw/s390x/cpu-topology.c
>>   create mode 100644 include/hw/s390x/cpu-topology.h
> ...
>> +bool s390_topology_new_cpu(MachineState *ms, int core_id, Error **errp)
>> +{
>> +    S390TopologyBook *book;
>> +    S390TopologySocket *socket;
>> +    S390TopologyCores *cores;
>> +    int nb_cores_per_socket;
>> +    int origin, bit;
>> +
>> +    book = s390_get_topology();
>> +
>> +    nb_cores_per_socket = ms->smp.cores * ms->smp.threads;
>> +
>> +    socket = s390_get_socket(ms, book, core_id / nb_cores_per_socket, 
>> errp);
>> +    if (!socket) {
>> +        return false;
>> +    }
>> +
>> +    /*
>> +     * At the core level, each CPU is represented by a bit in a 64bit
>> +     * unsigned long. Set on plug and clear on unplug of a CPU.
>> +     * The firmware assume that all CPU in the core description have 
>> the same
>> +     * type, polarization and are all dedicated or shared.
>> +     * In the case a socket contains CPU with different type, 
>> polarization
>> +     * or dedication then they will be defined in different CPU 
>> containers.
>> +     * Currently we assume all CPU are identical and the only reason 
>> to have
>> +     * several S390TopologyCores inside a socket is to have more than 
>> 64 CPUs
>> +     * in that case the origin field, representing the offset of the 
>> first CPU
>> +     * in the CPU container allows to represent up to the maximal 
>> number of
>> +     * CPU inside several CPU containers inside the socket container.
>> +     */
>> +    origin = 64 * (core_id / 64);
> 
> Maybe faster:
> 
>      origin = core_id & ~63;
> 
> By the way, where is this limitation to 64 coming from? Just because 
> we're using a "unsigned long" for now? Or is this a limitation from the 
> architecture?
> 
>> +    cores = s390_get_cores(ms, socket, origin, errp);
>> +    if (!cores) {
>> +        return false;
>> +    }
>> +
>> +    bit = 63 - (core_id - origin);
>> +    set_bit(bit, &cores->mask);
>> +    cores->origin = origin;
>> +
>> +    return true;
>> +}
> ...
>> diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
>> index cc3097bfee..a586875b24 100644
>> --- a/hw/s390x/s390-virtio-ccw.c
>> +++ b/hw/s390x/s390-virtio-ccw.c
>> @@ -43,6 +43,7 @@
>>   #include "sysemu/sysemu.h"
>>   #include "hw/s390x/pv.h"
>>   #include "migration/blocker.h"
>> +#include "hw/s390x/cpu-topology.h"
>>   static Error *pv_mig_blocker;
>> @@ -89,6 +90,7 @@ static void s390_init_cpus(MachineState *machine)
>>       /* initialize possible_cpus */
>>       mc->possible_cpu_arch_ids(machine);
>> +    s390_topology_setup(machine);
> 
> Is this safe with regards to migration? Did you tried a ping-pong 
> migration from an older QEMU to a QEMU with your modifications and back 
> to the older one? If it does not work, we might need to wire this setup 
> to the machine types...

I checked with the follow-up series :
OLD-> NEW -> OLD -> NEW

It is working fine, of course we need to fence the CPU topology facility 
with ctop=off on the new QEMU to avoid authorizing the new instructions, 
PTF and STSI(15).

The new series will also be much simpler, 725 LOCs including a 
documentation against ... 1377 without documentation.

I let fall a lot of QEMU objects that did not have really a use on the 
advise from Janis and also simplified the STSI handling.

I still need to had more comments so it will grow again but for a good 
reason.

Regards,
Pierre
Thomas Huth Aug. 24, 2022, 7:30 a.m. UTC | #15
On 23/08/2022 19.41, Pierre Morel wrote:
> 
> 
> On 8/23/22 15:30, Thomas Huth wrote:
>> On 20/06/2022 16.03, Pierre Morel wrote:
>>> We use new objects to have a dynamic administration of the CPU topology.
>>> The highest level object in this implementation is the s390 book and
>>> in this first implementation of CPU topology for S390 we have a single
>>> book.
>>> The book is built as a SYSBUS bridge during the CPU initialization.
>>> Other objects, sockets and core will be built after the parsing
>>> of the QEMU -smp argument.
>>>
>>> Every object under this single book will be build dynamically
>>> immediately after a CPU has be realized if it is needed.
>>> The CPU will fill the sockets once after the other, according to the
>>> number of core per socket defined during the smp parsing.
>>>
>>> Each CPU inside a socket will be represented by a bit in a 64bit
>>> unsigned long. Set on plug and clear on unplug of a CPU.
>>>
>>> For the S390 CPU topology, thread and cores are merged into
>>> topology cores and the number of topology cores is the multiplication
>>> of cores by the numbers of threads.
>>>
>>> Signed-off-by: Pierre Morel <pmorel@linux.ibm.com>
>>> ---
>>>   hw/s390x/cpu-topology.c         | 391 ++++++++++++++++++++++++++++++++
>>>   hw/s390x/meson.build            |   1 +
>>>   hw/s390x/s390-virtio-ccw.c      |   6 +
>>>   include/hw/s390x/cpu-topology.h |  74 ++++++
>>>   target/s390x/cpu.h              |  47 ++++
>>>   5 files changed, 519 insertions(+)
>>>   create mode 100644 hw/s390x/cpu-topology.c
>>>   create mode 100644 include/hw/s390x/cpu-topology.h
>> ...
>>> +bool s390_topology_new_cpu(MachineState *ms, int core_id, Error **errp)
>>> +{
>>> +    S390TopologyBook *book;
>>> +    S390TopologySocket *socket;
>>> +    S390TopologyCores *cores;
>>> +    int nb_cores_per_socket;
>>> +    int origin, bit;
>>> +
>>> +    book = s390_get_topology();
>>> +
>>> +    nb_cores_per_socket = ms->smp.cores * ms->smp.threads;
>>> +
>>> +    socket = s390_get_socket(ms, book, core_id / nb_cores_per_socket, 
>>> errp);
>>> +    if (!socket) {
>>> +        return false;
>>> +    }
>>> +
>>> +    /*
>>> +     * At the core level, each CPU is represented by a bit in a 64bit
>>> +     * unsigned long. Set on plug and clear on unplug of a CPU.
>>> +     * The firmware assume that all CPU in the core description have the 
>>> same
>>> +     * type, polarization and are all dedicated or shared.
>>> +     * In the case a socket contains CPU with different type, polarization
>>> +     * or dedication then they will be defined in different CPU containers.
>>> +     * Currently we assume all CPU are identical and the only reason to 
>>> have
>>> +     * several S390TopologyCores inside a socket is to have more than 64 
>>> CPUs
>>> +     * in that case the origin field, representing the offset of the 
>>> first CPU
>>> +     * in the CPU container allows to represent up to the maximal number of
>>> +     * CPU inside several CPU containers inside the socket container.
>>> +     */
>>> +    origin = 64 * (core_id / 64);
>>
>> Maybe faster:
>>
>>      origin = core_id & ~63;
>>
>> By the way, where is this limitation to 64 coming from? Just because we're 
>> using a "unsigned long" for now? Or is this a limitation from the 
>> architecture?
>>
>>> +    cores = s390_get_cores(ms, socket, origin, errp);
>>> +    if (!cores) {
>>> +        return false;
>>> +    }
>>> +
>>> +    bit = 63 - (core_id - origin);
>>> +    set_bit(bit, &cores->mask);
>>> +    cores->origin = origin;
>>> +
>>> +    return true;
>>> +}
>> ...
>>> diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
>>> index cc3097bfee..a586875b24 100644
>>> --- a/hw/s390x/s390-virtio-ccw.c
>>> +++ b/hw/s390x/s390-virtio-ccw.c
>>> @@ -43,6 +43,7 @@
>>>   #include "sysemu/sysemu.h"
>>>   #include "hw/s390x/pv.h"
>>>   #include "migration/blocker.h"
>>> +#include "hw/s390x/cpu-topology.h"
>>>   static Error *pv_mig_blocker;
>>> @@ -89,6 +90,7 @@ static void s390_init_cpus(MachineState *machine)
>>>       /* initialize possible_cpus */
>>>       mc->possible_cpu_arch_ids(machine);
>>> +    s390_topology_setup(machine);
>>
>> Is this safe with regards to migration? Did you tried a ping-pong 
>> migration from an older QEMU to a QEMU with your modifications and back to 
>> the older one? If it does not work, we might need to wire this setup to 
>> the machine types...
> 
> I checked with the follow-up series :
> OLD-> NEW -> OLD -> NEW
> 
> It is working fine, of course we need to fence the CPU topology facility 
> with ctop=off on the new QEMU to avoid authorizing the new instructions, PTF 
> and STSI(15).

When using an older machine type, the facility should be disabled by 
default, so the user does not have to know that ctop=off has to be set ... 
so I think you should only do the s390_topology_setup() by default if using 
the 7.2 machine type (or newer).

  Thomas
Pierre Morel Aug. 24, 2022, 8:41 a.m. UTC | #16
On 8/24/22 09:30, Thomas Huth wrote:
> On 23/08/2022 19.41, Pierre Morel wrote:
>>
>>
>> On 8/23/22 15:30, Thomas Huth wrote:
>>> On 20/06/2022 16.03, Pierre Morel wrote:
>>>> We use new objects to have a dynamic administration of the CPU 
>>>> topology.
>>>> The highest level object in this implementation is the s390 book and
>>>> in this first implementation of CPU topology for S390 we have a single
>>>> book.
>>>> The book is built as a SYSBUS bridge during the CPU initialization.
>>>> Other objects, sockets and core will be built after the parsing
>>>> of the QEMU -smp argument.
>>>>
>>>> Every object under this single book will be build dynamically
>>>> immediately after a CPU has be realized if it is needed.
>>>> The CPU will fill the sockets once after the other, according to the
>>>> number of core per socket defined during the smp parsing.
>>>>
>>>> Each CPU inside a socket will be represented by a bit in a 64bit
>>>> unsigned long. Set on plug and clear on unplug of a CPU.
>>>>
>>>> For the S390 CPU topology, thread and cores are merged into
>>>> topology cores and the number of topology cores is the multiplication
>>>> of cores by the numbers of threads.
>>>>
>>>> Signed-off-by: Pierre Morel <pmorel@linux.ibm.com>
>>>> ---
>>>>   hw/s390x/cpu-topology.c         | 391 
>>>> ++++++++++++++++++++++++++++++++
>>>>   hw/s390x/meson.build            |   1 +
>>>>   hw/s390x/s390-virtio-ccw.c      |   6 +
>>>>   include/hw/s390x/cpu-topology.h |  74 ++++++
>>>>   target/s390x/cpu.h              |  47 ++++
>>>>   5 files changed, 519 insertions(+)
>>>>   create mode 100644 hw/s390x/cpu-topology.c
>>>>   create mode 100644 include/hw/s390x/cpu-topology.h
>>> ...
>>>> +bool s390_topology_new_cpu(MachineState *ms, int core_id, Error 
>>>> **errp)
>>>> +{
>>>> +    S390TopologyBook *book;
>>>> +    S390TopologySocket *socket;
>>>> +    S390TopologyCores *cores;
>>>> +    int nb_cores_per_socket;
>>>> +    int origin, bit;
>>>> +
>>>> +    book = s390_get_topology();
>>>> +
>>>> +    nb_cores_per_socket = ms->smp.cores * ms->smp.threads;
>>>> +
>>>> +    socket = s390_get_socket(ms, book, core_id / 
>>>> nb_cores_per_socket, errp);
>>>> +    if (!socket) {
>>>> +        return false;
>>>> +    }
>>>> +
>>>> +    /*
>>>> +     * At the core level, each CPU is represented by a bit in a 64bit
>>>> +     * unsigned long. Set on plug and clear on unplug of a CPU.
>>>> +     * The firmware assume that all CPU in the core description 
>>>> have the same
>>>> +     * type, polarization and are all dedicated or shared.
>>>> +     * In the case a socket contains CPU with different type, 
>>>> polarization
>>>> +     * or dedication then they will be defined in different CPU 
>>>> containers.
>>>> +     * Currently we assume all CPU are identical and the only 
>>>> reason to have
>>>> +     * several S390TopologyCores inside a socket is to have more 
>>>> than 64 CPUs
>>>> +     * in that case the origin field, representing the offset of 
>>>> the first CPU
>>>> +     * in the CPU container allows to represent up to the maximal 
>>>> number of
>>>> +     * CPU inside several CPU containers inside the socket container.
>>>> +     */
>>>> +    origin = 64 * (core_id / 64);
>>>
>>> Maybe faster:
>>>
>>>      origin = core_id & ~63;
>>>
>>> By the way, where is this limitation to 64 coming from? Just because 
>>> we're using a "unsigned long" for now? Or is this a limitation from 
>>> the architecture?
>>>
>>>> +    cores = s390_get_cores(ms, socket, origin, errp);
>>>> +    if (!cores) {
>>>> +        return false;
>>>> +    }
>>>> +
>>>> +    bit = 63 - (core_id - origin);
>>>> +    set_bit(bit, &cores->mask);
>>>> +    cores->origin = origin;
>>>> +
>>>> +    return true;
>>>> +}
>>> ...
>>>> diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
>>>> index cc3097bfee..a586875b24 100644
>>>> --- a/hw/s390x/s390-virtio-ccw.c
>>>> +++ b/hw/s390x/s390-virtio-ccw.c
>>>> @@ -43,6 +43,7 @@
>>>>   #include "sysemu/sysemu.h"
>>>>   #include "hw/s390x/pv.h"
>>>>   #include "migration/blocker.h"
>>>> +#include "hw/s390x/cpu-topology.h"
>>>>   static Error *pv_mig_blocker;
>>>> @@ -89,6 +90,7 @@ static void s390_init_cpus(MachineState *machine)
>>>>       /* initialize possible_cpus */
>>>>       mc->possible_cpu_arch_ids(machine);
>>>> +    s390_topology_setup(machine);
>>>
>>> Is this safe with regards to migration? Did you tried a ping-pong 
>>> migration from an older QEMU to a QEMU with your modifications and 
>>> back to the older one? If it does not work, we might need to wire 
>>> this setup to the machine types...
>>
>> I checked with the follow-up series :
>> OLD-> NEW -> OLD -> NEW
>>
>> It is working fine, of course we need to fence the CPU topology 
>> facility with ctop=off on the new QEMU to avoid authorizing the new 
>> instructions, PTF and STSI(15).
> 
> When using an older machine type, the facility should be disabled by 
> default, so the user does not have to know that ctop=off has to be set 
> ... so I think you should only do the s390_topology_setup() by default 
> if using the 7.2 machine type (or newer).
> 
>   Thomas
> 

Oh OK, thanks.
I add this for the next series of course.

Regards,
Pierre
diff mbox series

Patch

diff --git a/hw/s390x/cpu-topology.c b/hw/s390x/cpu-topology.c
new file mode 100644
index 0000000000..0fd6f08084
--- /dev/null
+++ b/hw/s390x/cpu-topology.c
@@ -0,0 +1,391 @@ 
+/*
+ * CPU Topology
+ *
+ * Copyright 2022 IBM Corp.
+ * Author(s): Pierre Morel <pmorel@linux.ibm.com>
+
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "hw/sysbus.h"
+#include "hw/s390x/cpu-topology.h"
+#include "hw/qdev-properties.h"
+#include "hw/boards.h"
+#include "qemu/typedefs.h"
+#include "target/s390x/cpu.h"
+#include "hw/s390x/s390-virtio-ccw.h"
+
+/*
+ * s390_create_cores:
+ * @ms: Machine state
+ * @socket: the socket on which to create the core set
+ * @origin: the origin offset of the first core of the set
+ * @errp: Error pointer
+ *
+ * returns a pointer to the created S390TopologyCores structure
+ *
+ * On error: return NULL
+ */
+static S390TopologyCores *s390_create_cores(MachineState *ms,
+                                            S390TopologySocket *socket,
+                                            int origin, Error **errp)
+{
+    DeviceState *dev;
+    S390TopologyCores *cores;
+
+    if (socket->bus->num_children >= ms->smp.cores * ms->smp.threads) {
+        error_setg(errp, "Unable to create more cores.");
+        return NULL;
+    }
+
+    dev = qdev_new(TYPE_S390_TOPOLOGY_CORES);
+    qdev_realize_and_unref(dev, socket->bus, &error_fatal);
+
+    cores = S390_TOPOLOGY_CORES(dev);
+    cores->origin = origin;
+    socket->cnt += 1;
+
+    return cores;
+}
+
+/*
+ * s390_create_socket:
+ * @ms: Machine state
+ * @book: the book on which to create the socket
+ * @id: the socket id
+ * @errp: Error pointer
+ *
+ * returns a pointer to the created S390TopologySocket structure
+ *
+ * On error: return NULL
+ */
+static S390TopologySocket *s390_create_socket(MachineState *ms,
+                                              S390TopologyBook *book,
+                                              int id, Error **errp)
+{
+    DeviceState *dev;
+    S390TopologySocket *socket;
+
+    if (book->bus->num_children >= ms->smp.sockets) {
+        error_setg(errp, "Unable to create more sockets.");
+        return NULL;
+    }
+
+    dev = qdev_new(TYPE_S390_TOPOLOGY_SOCKET);
+    qdev_realize_and_unref(dev, book->bus, &error_fatal);
+
+    socket = S390_TOPOLOGY_SOCKET(dev);
+    socket->socket_id = id;
+    book->cnt++;
+
+    return socket;
+}
+
+/*
+ * s390_get_cores:
+ * @ms: Machine state
+ * @socket: the socket to search into
+ * @origin: the origin specified for the S390TopologyCores
+ * @errp: Error pointer
+ *
+ * returns a pointer to a S390TopologyCores structure within a socket having
+ * the specified origin.
+ * First search if the socket is already containing the S390TopologyCores
+ * structure and if not create one with this origin.
+ */
+static S390TopologyCores *s390_get_cores(MachineState *ms,
+                                         S390TopologySocket *socket,
+                                         int origin, Error **errp)
+{
+    S390TopologyCores *cores;
+    BusChild *kid;
+
+    QTAILQ_FOREACH(kid, &socket->bus->children, sibling) {
+        cores = S390_TOPOLOGY_CORES(kid->child);
+        if (cores->origin == origin) {
+            return cores;
+        }
+    }
+    return s390_create_cores(ms, socket, origin, errp);
+}
+
+/*
+ * s390_get_socket:
+ * @ms: Machine state
+ * @book: The book to search into
+ * @socket_id: the identifier of the socket to search for
+ * @errp: Error pointer
+ *
+ * returns a pointer to a S390TopologySocket structure within a book having
+ * the specified socket_id.
+ * First search if the book is already containing the S390TopologySocket
+ * structure and if not create one with this socket_id.
+ */
+static S390TopologySocket *s390_get_socket(MachineState *ms,
+                                           S390TopologyBook *book,
+                                           int socket_id, Error **errp)
+{
+    S390TopologySocket *socket;
+    BusChild *kid;
+
+    QTAILQ_FOREACH(kid, &book->bus->children, sibling) {
+        socket = S390_TOPOLOGY_SOCKET(kid->child);
+        if (socket->socket_id == socket_id) {
+            return socket;
+        }
+    }
+    return s390_create_socket(ms, book, socket_id, errp);
+}
+
+/*
+ * s390_topology_new_cpu:
+ * @core_id: the core ID is machine wide
+ *
+ * We have a single book returned by s390_get_topology(),
+ * then we build the hierarchy on demand.
+ * Note that we do not destroy the hierarchy on error creating
+ * an entry in the topology, we just keep it empty.
+ * We do not need to worry about not finding a topology level
+ * entry this would have been caught during smp parsing.
+ */
+bool s390_topology_new_cpu(MachineState *ms, int core_id, Error **errp)
+{
+    S390TopologyBook *book;
+    S390TopologySocket *socket;
+    S390TopologyCores *cores;
+    int nb_cores_per_socket;
+    int origin, bit;
+
+    book = s390_get_topology();
+
+    nb_cores_per_socket = ms->smp.cores * ms->smp.threads;
+
+    socket = s390_get_socket(ms, book, core_id / nb_cores_per_socket, errp);
+    if (!socket) {
+        return false;
+    }
+
+    /*
+     * At the core level, each CPU is represented by a bit in a 64bit
+     * unsigned long. Set on plug and clear on unplug of a CPU.
+     * The firmware assume that all CPU in the core description have the same
+     * type, polarization and are all dedicated or shared.
+     * In the case a socket contains CPU with different type, polarization
+     * or dedication then they will be defined in different CPU containers.
+     * Currently we assume all CPU are identical and the only reason to have
+     * several S390TopologyCores inside a socket is to have more than 64 CPUs
+     * in that case the origin field, representing the offset of the first CPU
+     * in the CPU container allows to represent up to the maximal number of
+     * CPU inside several CPU containers inside the socket container.
+     */
+    origin = 64 * (core_id / 64);
+
+    cores = s390_get_cores(ms, socket, origin, errp);
+    if (!cores) {
+        return false;
+    }
+
+    bit = 63 - (core_id - origin);
+    set_bit(bit, &cores->mask);
+    cores->origin = origin;
+
+    return true;
+}
+
+/*
+ * Setting the first topology: 1 book, 1 socket
+ * This is enough for 64 cores if the topology is flat (single socket)
+ */
+void s390_topology_setup(MachineState *ms)
+{
+    DeviceState *dev;
+
+    /* Create BOOK bridge device */
+    dev = qdev_new(TYPE_S390_TOPOLOGY_BOOK);
+    object_property_add_child(qdev_get_machine(),
+                              TYPE_S390_TOPOLOGY_BOOK, OBJECT(dev));
+    sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
+}
+
+S390TopologyBook *s390_get_topology(void)
+{
+    static S390TopologyBook *book;
+
+    if (!book) {
+        book = S390_TOPOLOGY_BOOK(
+            object_resolve_path(TYPE_S390_TOPOLOGY_BOOK, NULL));
+        assert(book != NULL);
+    }
+
+    return book;
+}
+
+/* --- CORES Definitions --- */
+
+static Property s390_topology_cores_properties[] = {
+    DEFINE_PROP_BOOL("dedicated", S390TopologyCores, dedicated, false),
+    DEFINE_PROP_UINT8("polarity", S390TopologyCores, polarity,
+                      S390_TOPOLOGY_POLARITY_H),
+    DEFINE_PROP_UINT8("cputype", S390TopologyCores, cputype,
+                      S390_TOPOLOGY_CPU_TYPE),
+    DEFINE_PROP_UINT16("origin", S390TopologyCores, origin, 0),
+    DEFINE_PROP_UINT64("mask", S390TopologyCores, mask, 0),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void cpu_cores_class_init(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);
+
+    device_class_set_props(dc, s390_topology_cores_properties);
+    hc->unplug = qdev_simple_device_unplug_cb;
+    dc->bus_type = TYPE_S390_TOPOLOGY_SOCKET_BUS;
+    dc->desc = "topology cpu entry";
+}
+
+static const TypeInfo cpu_cores_info = {
+    .name          = TYPE_S390_TOPOLOGY_CORES,
+    .parent        = TYPE_DEVICE,
+    .instance_size = sizeof(S390TopologyCores),
+    .class_init    = cpu_cores_class_init,
+    .interfaces = (InterfaceInfo[]) {
+        { TYPE_HOTPLUG_HANDLER },
+        { }
+    }
+};
+
+static char *socket_bus_get_dev_path(DeviceState *dev)
+{
+    S390TopologySocket *socket = S390_TOPOLOGY_SOCKET(dev);
+    DeviceState *book = dev->parent_bus->parent;
+    char *id = qdev_get_dev_path(book);
+    char *ret;
+
+    if (id) {
+        ret = g_strdup_printf("%s:%02d", id, socket->socket_id);
+        g_free(id);
+    } else {
+        ret = g_strdup_printf("_:%02d", socket->socket_id);
+    }
+
+    return ret;
+}
+
+static void socket_bus_class_init(ObjectClass *oc, void *data)
+{
+    BusClass *k = BUS_CLASS(oc);
+
+    k->get_dev_path = socket_bus_get_dev_path;
+    k->max_dev = S390_MAX_SOCKETS;
+}
+
+static const TypeInfo socket_bus_info = {
+    .name = TYPE_S390_TOPOLOGY_SOCKET_BUS,
+    .parent = TYPE_BUS,
+    .instance_size = 0,
+    .class_init = socket_bus_class_init,
+};
+
+static void s390_socket_device_realize(DeviceState *dev, Error **errp)
+{
+    S390TopologySocket *socket = S390_TOPOLOGY_SOCKET(dev);
+    BusState *bus;
+
+    bus = qbus_new(TYPE_S390_TOPOLOGY_SOCKET_BUS, dev,
+                   TYPE_S390_TOPOLOGY_SOCKET_BUS);
+    qbus_set_hotplug_handler(bus, OBJECT(dev));
+    socket->bus = bus;
+}
+
+static void socket_class_init(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);
+
+    hc->unplug = qdev_simple_device_unplug_cb;
+    set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
+    dc->bus_type = TYPE_S390_TOPOLOGY_BOOK_BUS;
+    dc->realize = s390_socket_device_realize;
+    dc->desc = "topology socket";
+}
+
+static const TypeInfo socket_info = {
+    .name          = TYPE_S390_TOPOLOGY_SOCKET,
+    .parent        = TYPE_DEVICE,
+    .instance_size = sizeof(S390TopologySocket),
+    .class_init    = socket_class_init,
+    .interfaces = (InterfaceInfo[]) {
+        { TYPE_HOTPLUG_HANDLER },
+        { }
+    }
+};
+
+static char *book_bus_get_dev_path(DeviceState *dev)
+{
+    return g_strdup("00");
+}
+
+static void book_bus_class_init(ObjectClass *oc, void *data)
+{
+    BusClass *k = BUS_CLASS(oc);
+
+    k->get_dev_path = book_bus_get_dev_path;
+    k->max_dev = S390_MAX_BOOKS;
+}
+
+static const TypeInfo book_bus_info = {
+    .name = TYPE_S390_TOPOLOGY_BOOK_BUS,
+    .parent = TYPE_BUS,
+    .instance_size = 0,
+    .class_init = book_bus_class_init,
+};
+
+static void s390_book_device_realize(DeviceState *dev, Error **errp)
+{
+    S390TopologyBook *book = S390_TOPOLOGY_BOOK(dev);
+    BusState *bus;
+
+    bus = qbus_new(TYPE_S390_TOPOLOGY_BOOK_BUS, dev,
+                   TYPE_S390_TOPOLOGY_BOOK_BUS);
+    qbus_set_hotplug_handler(bus, OBJECT(dev));
+    book->bus = bus;
+}
+
+static void book_class_init(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);
+
+    hc->unplug = qdev_simple_device_unplug_cb;
+    set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
+    dc->realize = s390_book_device_realize;
+    dc->desc = "topology book";
+}
+
+static const TypeInfo book_info = {
+    .name          = TYPE_S390_TOPOLOGY_BOOK,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(S390TopologyBook),
+    .class_init    = book_class_init,
+    .interfaces = (InterfaceInfo[]) {
+        { TYPE_HOTPLUG_HANDLER },
+        { }
+    }
+};
+
+static void topology_register(void)
+{
+    type_register_static(&cpu_cores_info);
+    type_register_static(&socket_bus_info);
+    type_register_static(&socket_info);
+    type_register_static(&book_bus_info);
+    type_register_static(&book_info);
+}
+
+type_init(topology_register);
diff --git a/hw/s390x/meson.build b/hw/s390x/meson.build
index feefe0717e..3592fa952b 100644
--- a/hw/s390x/meson.build
+++ b/hw/s390x/meson.build
@@ -2,6 +2,7 @@  s390x_ss = ss.source_set()
 s390x_ss.add(files(
   'ap-bridge.c',
   'ap-device.c',
+  'cpu-topology.c',
   'ccw-device.c',
   'css-bridge.c',
   'css.c',
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index cc3097bfee..a586875b24 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -43,6 +43,7 @@ 
 #include "sysemu/sysemu.h"
 #include "hw/s390x/pv.h"
 #include "migration/blocker.h"
+#include "hw/s390x/cpu-topology.h"
 
 static Error *pv_mig_blocker;
 
@@ -89,6 +90,7 @@  static void s390_init_cpus(MachineState *machine)
     /* initialize possible_cpus */
     mc->possible_cpu_arch_ids(machine);
 
+    s390_topology_setup(machine);
     for (i = 0; i < machine->smp.cpus; i++) {
         s390x_new_cpu(machine->cpu_type, i, &error_fatal);
     }
@@ -306,6 +308,10 @@  static void s390_cpu_plug(HotplugHandler *hotplug_dev,
     g_assert(!ms->possible_cpus->cpus[cpu->env.core_id].cpu);
     ms->possible_cpus->cpus[cpu->env.core_id].cpu = OBJECT(dev);
 
+    if (!s390_topology_new_cpu(ms, cpu->env.core_id, errp)) {
+        return;
+    }
+
     if (dev->hotplugged) {
         raise_irq_cpu_hotplug();
     }
diff --git a/include/hw/s390x/cpu-topology.h b/include/hw/s390x/cpu-topology.h
new file mode 100644
index 0000000000..beec61706c
--- /dev/null
+++ b/include/hw/s390x/cpu-topology.h
@@ -0,0 +1,74 @@ 
+/*
+ * CPU Topology
+ *
+ * Copyright 2022 IBM Corp.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+#ifndef HW_S390X_CPU_TOPOLOGY_H
+#define HW_S390X_CPU_TOPOLOGY_H
+
+#include "hw/qdev-core.h"
+#include "qom/object.h"
+
+#define S390_TOPOLOGY_CPU_TYPE    0x03
+
+#define S390_TOPOLOGY_POLARITY_H  0x00
+#define S390_TOPOLOGY_POLARITY_VL 0x01
+#define S390_TOPOLOGY_POLARITY_VM 0x02
+#define S390_TOPOLOGY_POLARITY_VH 0x03
+
+#define TYPE_S390_TOPOLOGY_CORES "topology cores"
+    /*
+     * Each CPU inside a socket will be represented by a bit in a 64bit
+     * unsigned long. Set on plug and clear on unplug of a CPU.
+     * All CPU inside a mask share the same dedicated, polarity and
+     * cputype values.
+     * The origin is the offset of the first CPU in a mask.
+     */
+struct S390TopologyCores {
+    DeviceState parent_obj;
+    int id;
+    bool dedicated;
+    uint8_t polarity;
+    uint8_t cputype;
+    uint16_t origin;
+    uint64_t mask;
+    int cnt;
+};
+typedef struct S390TopologyCores S390TopologyCores;
+OBJECT_DECLARE_SIMPLE_TYPE(S390TopologyCores, S390_TOPOLOGY_CORES)
+
+#define TYPE_S390_TOPOLOGY_SOCKET "topology socket"
+#define TYPE_S390_TOPOLOGY_SOCKET_BUS "socket-bus"
+struct S390TopologySocket {
+    DeviceState parent_obj;
+    BusState *bus;
+    int socket_id;
+    int cnt;
+};
+typedef struct S390TopologySocket S390TopologySocket;
+OBJECT_DECLARE_SIMPLE_TYPE(S390TopologySocket, S390_TOPOLOGY_SOCKET)
+#define S390_MAX_SOCKETS 4
+
+#define TYPE_S390_TOPOLOGY_BOOK "topology book"
+#define TYPE_S390_TOPOLOGY_BOOK_BUS "book-bus"
+struct S390TopologyBook {
+    SysBusDevice parent_obj;
+    BusState *bus;
+    int book_id;
+    int cnt;
+};
+typedef struct S390TopologyBook S390TopologyBook;
+OBJECT_DECLARE_SIMPLE_TYPE(S390TopologyBook, S390_TOPOLOGY_BOOK)
+#define S390_MAX_BOOKS 1
+
+S390TopologyBook *s390_init_topology(void);
+
+S390TopologyBook *s390_get_topology(void);
+void s390_topology_setup(MachineState *ms);
+bool s390_topology_new_cpu(MachineState *ms, int core_id, Error **errp);
+
+#endif
diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h
index 7d6d01325b..216adfde26 100644
--- a/target/s390x/cpu.h
+++ b/target/s390x/cpu.h
@@ -565,6 +565,53 @@  typedef union SysIB {
 } SysIB;
 QEMU_BUILD_BUG_ON(sizeof(SysIB) != 4096);
 
+/* CPU type Topology List Entry */
+typedef struct SysIBTl_cpu {
+        uint8_t nl;
+        uint8_t reserved0[3];
+        uint8_t reserved1:5;
+        uint8_t dedicated:1;
+        uint8_t polarity:2;
+        uint8_t type;
+        uint16_t origin;
+        uint64_t mask;
+} SysIBTl_cpu;
+QEMU_BUILD_BUG_ON(sizeof(SysIBTl_cpu) != 16);
+
+/* Container type Topology List Entry */
+typedef struct SysIBTl_container {
+        uint8_t nl;
+        uint8_t reserved[6];
+        uint8_t id;
+} QEMU_PACKED SysIBTl_container;
+QEMU_BUILD_BUG_ON(sizeof(SysIBTl_container) != 8);
+
+/* Generic Topology List Entry */
+typedef union SysIBTl_entry {
+        uint8_t nl;
+        SysIBTl_container container;
+        SysIBTl_cpu cpu;
+} SysIBTl_entry;
+
+#define TOPOLOGY_NR_MAG  6
+#define TOPOLOGY_NR_MAG6 0
+#define TOPOLOGY_NR_MAG5 1
+#define TOPOLOGY_NR_MAG4 2
+#define TOPOLOGY_NR_MAG3 3
+#define TOPOLOGY_NR_MAG2 4
+#define TOPOLOGY_NR_MAG1 5
+/* Configuration topology */
+typedef struct SysIB_151x {
+    uint8_t  res0[2];
+    uint16_t length;
+    uint8_t  mag[TOPOLOGY_NR_MAG];
+    uint8_t  res1;
+    uint8_t  mnest;
+    uint32_t res2;
+    SysIBTl_entry tle[0];
+} SysIB_151x;
+QEMU_BUILD_BUG_ON(sizeof(SysIB_151x) != 16);
+
 /* MMU defines */
 #define ASCE_ORIGIN           (~0xfffULL) /* segment table origin             */
 #define ASCE_SUBSPACE         0x200       /* subspace group control           */