Message ID | 20240108082727.420817-11-zhao1.liu@linux.intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Support smp.clusters for x86 in QEMU | expand |
On 1/8/2024 4:27 PM, Zhao Liu wrote: > From: Zhuocheng Ding <zhuocheng.ding@intel.com> > > Introduce cluster-id other than module-id to be consistent with > CpuInstanceProperties.cluster-id, and this avoids the confusion > of parameter names when hotplugging. I don't think reusing 'cluster' from arm for x86's 'module' is a good idea. It introduces confusion around the code. s390 just added 'drawer' and 'book' in cpu topology[1]. I think we can also add a module level for x86 instead of reusing cluster. (This is also what I want to reply to the cover letter.) [1] https://lore.kernel.org/qemu-devel/20231016183925.2384704-1-nsg@linux.ibm.com/ > Following the legacy smp check rules, also add the cluster_id validity > into x86_cpu_pre_plug(). > > Signed-off-by: Zhuocheng Ding <zhuocheng.ding@intel.com> > Co-developed-by: Zhao Liu <zhao1.liu@intel.com> > Signed-off-by: Zhao Liu <zhao1.liu@intel.com> > Tested-by: Babu Moger <babu.moger@amd.com> > Tested-by: Yongwei Ma <yongwei.ma@intel.com> > Acked-by: Michael S. Tsirkin <mst@redhat.com> > --- > Changes since v6: > * Update the comment when check cluster-id. Since there's no > v8.2, the cluster-id support should at least start from v9.0. > > Changes since v5: > * Update the comment when check cluster-id. Since current QEMU is > v8.2, the cluster-id support should at least start from v8.3. > > Changes since v3: > * Use the imperative in the commit message. (Babu) > --- > hw/i386/x86.c | 33 +++++++++++++++++++++++++-------- > target/i386/cpu.c | 2 ++ > target/i386/cpu.h | 1 + > 3 files changed, 28 insertions(+), 8 deletions(-) > > diff --git a/hw/i386/x86.c b/hw/i386/x86.c > index 5269aae3a5c2..1c1d368614ee 100644 > --- a/hw/i386/x86.c > +++ b/hw/i386/x86.c > @@ -329,6 +329,14 @@ void x86_cpu_pre_plug(HotplugHandler *hotplug_dev, > cpu->die_id = 0; > } > > + /* > + * cluster-id was optional in QEMU 9.0 and older, so keep it optional > + * if there's only one cluster per die. > + */ > + if (cpu->cluster_id < 0 && ms->smp.clusters == 1) { > + cpu->cluster_id = 0; > + } > + > if (cpu->socket_id < 0) { > error_setg(errp, "CPU socket-id is not set"); > return; > @@ -345,6 +353,14 @@ void x86_cpu_pre_plug(HotplugHandler *hotplug_dev, > cpu->die_id, ms->smp.dies - 1); > return; > } > + if (cpu->cluster_id < 0) { > + error_setg(errp, "CPU cluster-id is not set"); > + return; > + } else if (cpu->cluster_id > ms->smp.clusters - 1) { > + error_setg(errp, "Invalid CPU cluster-id: %u must be in range 0:%u", > + cpu->cluster_id, ms->smp.clusters - 1); > + return; > + } > if (cpu->core_id < 0) { > error_setg(errp, "CPU core-id is not set"); > return; > @@ -364,16 +380,9 @@ void x86_cpu_pre_plug(HotplugHandler *hotplug_dev, > > topo_ids.pkg_id = cpu->socket_id; > topo_ids.die_id = cpu->die_id; > + topo_ids.module_id = cpu->cluster_id; > topo_ids.core_id = cpu->core_id; > topo_ids.smt_id = cpu->thread_id; > - > - /* > - * TODO: This is the temporary initialization for topo_ids.module_id to > - * avoid "maybe-uninitialized" compilation errors. Will remove when > - * X86CPU supports cluster_id. > - */ > - topo_ids.module_id = 0; > - > cpu->apic_id = x86_apicid_from_topo_ids(&topo_info, &topo_ids); > } > > @@ -418,6 +427,14 @@ void x86_cpu_pre_plug(HotplugHandler *hotplug_dev, > } > cpu->die_id = topo_ids.die_id; > > + if (cpu->cluster_id != -1 && cpu->cluster_id != topo_ids.module_id) { > + error_setg(errp, "property cluster-id: %u doesn't match set apic-id:" > + " 0x%x (cluster-id: %u)", cpu->cluster_id, cpu->apic_id, > + topo_ids.module_id); > + return; > + } > + cpu->cluster_id = topo_ids.module_id; > + > if (cpu->core_id != -1 && cpu->core_id != topo_ids.core_id) { > error_setg(errp, "property core-id: %u doesn't match set apic-id:" > " 0x%x (core-id: %u)", cpu->core_id, cpu->apic_id, > diff --git a/target/i386/cpu.c b/target/i386/cpu.c > index a2d39d2198b6..498a4be62b40 100644 > --- a/target/i386/cpu.c > +++ b/target/i386/cpu.c > @@ -7909,12 +7909,14 @@ static Property x86_cpu_properties[] = { > DEFINE_PROP_UINT32("apic-id", X86CPU, apic_id, 0), > DEFINE_PROP_INT32("thread-id", X86CPU, thread_id, 0), > DEFINE_PROP_INT32("core-id", X86CPU, core_id, 0), > + DEFINE_PROP_INT32("cluster-id", X86CPU, cluster_id, 0), > DEFINE_PROP_INT32("die-id", X86CPU, die_id, 0), > DEFINE_PROP_INT32("socket-id", X86CPU, socket_id, 0), > #else > DEFINE_PROP_UINT32("apic-id", X86CPU, apic_id, UNASSIGNED_APIC_ID), > DEFINE_PROP_INT32("thread-id", X86CPU, thread_id, -1), > DEFINE_PROP_INT32("core-id", X86CPU, core_id, -1), > + DEFINE_PROP_INT32("cluster-id", X86CPU, cluster_id, -1), > DEFINE_PROP_INT32("die-id", X86CPU, die_id, -1), > DEFINE_PROP_INT32("socket-id", X86CPU, socket_id, -1), > #endif > diff --git a/target/i386/cpu.h b/target/i386/cpu.h > index 97b290e10576..009950b87203 100644 > --- a/target/i386/cpu.h > +++ b/target/i386/cpu.h > @@ -2057,6 +2057,7 @@ struct ArchCPU { > int32_t node_id; /* NUMA node this CPU belongs to */ > int32_t socket_id; > int32_t die_id; > + int32_t cluster_id; > int32_t core_id; > int32_t thread_id; >
On Sun, Jan 14, 2024 at 09:49:18PM +0800, Xiaoyao Li wrote: > Date: Sun, 14 Jan 2024 21:49:18 +0800 > From: Xiaoyao Li <xiaoyao.li@intel.com> > Subject: Re: [PATCH v7 10/16] i386/cpu: Introduce cluster-id to X86CPU > > On 1/8/2024 4:27 PM, Zhao Liu wrote: > > From: Zhuocheng Ding <zhuocheng.ding@intel.com> > > > > Introduce cluster-id other than module-id to be consistent with > > CpuInstanceProperties.cluster-id, and this avoids the confusion > > of parameter names when hotplugging. > > I don't think reusing 'cluster' from arm for x86's 'module' is a good idea. > It introduces confusion around the code. There is a precedent: generic "socket" v.s. i386 "package". The direct definition of cluster is the level that is above the "core" and shares the hardware resources including L2. In this sense, arm's cluster is the same as x86's module. Though different arches have different naming styles, but QEMU's generic code still need the uniform topology hierarchy. > > s390 just added 'drawer' and 'book' in cpu topology[1]. I think we can also > add a module level for x86 instead of reusing cluster. > > (This is also what I want to reply to the cover letter.) > > [1] https://lore.kernel.org/qemu-devel/20231016183925.2384704-1-nsg@linux.ibm.com/ These two new levels have the clear topological hierarchy relationship and don't duplicate existing ones. "book" or "drawer" may correspond to intel's "cluster". Maybe, in the future, we could support for arch-specific alias topologies in -smp. Thanks, Zhao > > > Following the legacy smp check rules, also add the cluster_id validity > > into x86_cpu_pre_plug(). > > > > Signed-off-by: Zhuocheng Ding <zhuocheng.ding@intel.com> > > Co-developed-by: Zhao Liu <zhao1.liu@intel.com> > > Signed-off-by: Zhao Liu <zhao1.liu@intel.com> > > Tested-by: Babu Moger <babu.moger@amd.com> > > Tested-by: Yongwei Ma <yongwei.ma@intel.com> > > Acked-by: Michael S. Tsirkin <mst@redhat.com> > > --- > > Changes since v6: > > * Update the comment when check cluster-id. Since there's no > > v8.2, the cluster-id support should at least start from v9.0. > > > > Changes since v5: > > * Update the comment when check cluster-id. Since current QEMU is > > v8.2, the cluster-id support should at least start from v8.3. > > > > Changes since v3: > > * Use the imperative in the commit message. (Babu) > > --- > > hw/i386/x86.c | 33 +++++++++++++++++++++++++-------- > > target/i386/cpu.c | 2 ++ > > target/i386/cpu.h | 1 + > > 3 files changed, 28 insertions(+), 8 deletions(-) > > > > diff --git a/hw/i386/x86.c b/hw/i386/x86.c > > index 5269aae3a5c2..1c1d368614ee 100644 > > --- a/hw/i386/x86.c > > +++ b/hw/i386/x86.c > > @@ -329,6 +329,14 @@ void x86_cpu_pre_plug(HotplugHandler *hotplug_dev, > > cpu->die_id = 0; > > } > > + /* > > + * cluster-id was optional in QEMU 9.0 and older, so keep it optional > > + * if there's only one cluster per die. > > + */ > > + if (cpu->cluster_id < 0 && ms->smp.clusters == 1) { > > + cpu->cluster_id = 0; > > + } > > + > > if (cpu->socket_id < 0) { > > error_setg(errp, "CPU socket-id is not set"); > > return; > > @@ -345,6 +353,14 @@ void x86_cpu_pre_plug(HotplugHandler *hotplug_dev, > > cpu->die_id, ms->smp.dies - 1); > > return; > > } > > + if (cpu->cluster_id < 0) { > > + error_setg(errp, "CPU cluster-id is not set"); > > + return; > > + } else if (cpu->cluster_id > ms->smp.clusters - 1) { > > + error_setg(errp, "Invalid CPU cluster-id: %u must be in range 0:%u", > > + cpu->cluster_id, ms->smp.clusters - 1); > > + return; > > + } > > if (cpu->core_id < 0) { > > error_setg(errp, "CPU core-id is not set"); > > return; > > @@ -364,16 +380,9 @@ void x86_cpu_pre_plug(HotplugHandler *hotplug_dev, > > topo_ids.pkg_id = cpu->socket_id; > > topo_ids.die_id = cpu->die_id; > > + topo_ids.module_id = cpu->cluster_id; > > topo_ids.core_id = cpu->core_id; > > topo_ids.smt_id = cpu->thread_id; > > - > > - /* > > - * TODO: This is the temporary initialization for topo_ids.module_id to > > - * avoid "maybe-uninitialized" compilation errors. Will remove when > > - * X86CPU supports cluster_id. > > - */ > > - topo_ids.module_id = 0; > > - > > cpu->apic_id = x86_apicid_from_topo_ids(&topo_info, &topo_ids); > > } > > @@ -418,6 +427,14 @@ void x86_cpu_pre_plug(HotplugHandler *hotplug_dev, > > } > > cpu->die_id = topo_ids.die_id; > > + if (cpu->cluster_id != -1 && cpu->cluster_id != topo_ids.module_id) { > > + error_setg(errp, "property cluster-id: %u doesn't match set apic-id:" > > + " 0x%x (cluster-id: %u)", cpu->cluster_id, cpu->apic_id, > > + topo_ids.module_id); > > + return; > > + } > > + cpu->cluster_id = topo_ids.module_id; > > + > > if (cpu->core_id != -1 && cpu->core_id != topo_ids.core_id) { > > error_setg(errp, "property core-id: %u doesn't match set apic-id:" > > " 0x%x (core-id: %u)", cpu->core_id, cpu->apic_id, > > diff --git a/target/i386/cpu.c b/target/i386/cpu.c > > index a2d39d2198b6..498a4be62b40 100644 > > --- a/target/i386/cpu.c > > +++ b/target/i386/cpu.c > > @@ -7909,12 +7909,14 @@ static Property x86_cpu_properties[] = { > > DEFINE_PROP_UINT32("apic-id", X86CPU, apic_id, 0), > > DEFINE_PROP_INT32("thread-id", X86CPU, thread_id, 0), > > DEFINE_PROP_INT32("core-id", X86CPU, core_id, 0), > > + DEFINE_PROP_INT32("cluster-id", X86CPU, cluster_id, 0), > > DEFINE_PROP_INT32("die-id", X86CPU, die_id, 0), > > DEFINE_PROP_INT32("socket-id", X86CPU, socket_id, 0), > > #else > > DEFINE_PROP_UINT32("apic-id", X86CPU, apic_id, UNASSIGNED_APIC_ID), > > DEFINE_PROP_INT32("thread-id", X86CPU, thread_id, -1), > > DEFINE_PROP_INT32("core-id", X86CPU, core_id, -1), > > + DEFINE_PROP_INT32("cluster-id", X86CPU, cluster_id, -1), > > DEFINE_PROP_INT32("die-id", X86CPU, die_id, -1), > > DEFINE_PROP_INT32("socket-id", X86CPU, socket_id, -1), > > #endif > > diff --git a/target/i386/cpu.h b/target/i386/cpu.h > > index 97b290e10576..009950b87203 100644 > > --- a/target/i386/cpu.h > > +++ b/target/i386/cpu.h > > @@ -2057,6 +2057,7 @@ struct ArchCPU { > > int32_t node_id; /* NUMA node this CPU belongs to */ > > int32_t socket_id; > > int32_t die_id; > > + int32_t cluster_id; > > int32_t core_id; > > int32_t thread_id; >
On 1/15/2024 11:27 AM, Zhao Liu wrote: > On Sun, Jan 14, 2024 at 09:49:18PM +0800, Xiaoyao Li wrote: >> Date: Sun, 14 Jan 2024 21:49:18 +0800 >> From: Xiaoyao Li <xiaoyao.li@intel.com> >> Subject: Re: [PATCH v7 10/16] i386/cpu: Introduce cluster-id to X86CPU >> >> On 1/8/2024 4:27 PM, Zhao Liu wrote: >>> From: Zhuocheng Ding <zhuocheng.ding@intel.com> >>> >>> Introduce cluster-id other than module-id to be consistent with >>> CpuInstanceProperties.cluster-id, and this avoids the confusion >>> of parameter names when hotplugging. >> >> I don't think reusing 'cluster' from arm for x86's 'module' is a good idea. >> It introduces confusion around the code. > > There is a precedent: generic "socket" v.s. i386 "package". It's not the same thing. "socket" vs "package" is just software people and hardware people chose different name. It's just different naming issue. however, here it's reusing name issue while 'cluster' has been defined for x86. It does introduce confusion. > The direct definition of cluster is the level that is above the "core" > and shares the hardware resources including L2. In this sense, arm's > cluster is the same as x86's module. then, what about intel implements tile level in the future? why ARM's 'cluster' is mapped to 'module', but not 'tile' ? reusing 'cluster' for 'module' is just a bad idea. > Though different arches have different naming styles, but QEMU's generic > code still need the uniform topology hierarchy. generic code can provide as many topology levels as it can. each ARCH can choose to use the ones it supports. e.g., in qapi/machine.json, it says, # The ordering from highest/coarsest to lowest/finest is: # @drawers, @books, @sockets, @dies, @clusters, @cores, @threads. # # Different architectures support different subsets of topology # containers. # # For example, s390x does not have clusters and dies, and the socket # is the parent container of cores. we can update it to # The ordering from highest/coarsest to lowest/finest is: # @drawers, @books, @sockets, @dies, @clusters, @module, @cores, # @threads. # # Different architectures support different subsets of topology # containers. # # For example, s390x does not have clusters and dies, and the socket # is the parent container of cores. # # For example, x86 does not have drawers and books, and does not support # cluster. even if cluster of x86 is supported someday in the future, we can remove the ordering requirement from above description. >> >> s390 just added 'drawer' and 'book' in cpu topology[1]. I think we can also >> add a module level for x86 instead of reusing cluster. >> >> (This is also what I want to reply to the cover letter.) >> >> [1] https://lore.kernel.org/qemu-devel/20231016183925.2384704-1-nsg@linux.ibm.com/ > > These two new levels have the clear topological hierarchy relationship > and don't duplicate existing ones. > > "book" or "drawer" may correspond to intel's "cluster". > > Maybe, in the future, we could support for arch-specific alias topologies > in -smp. I don't think we need alias, reusing 'cluster' for 'module' doesn't gain any benefit except avoid adding a new field in SMPconfiguration. All the other cluster code is ARM specific and x86 cannot share. I don't think it's a problem to add 'module' to SMPconfiguration. > Thanks, > Zhao > >> >>> Following the legacy smp check rules, also add the cluster_id validity >>> into x86_cpu_pre_plug(). >>> >>> Signed-off-by: Zhuocheng Ding <zhuocheng.ding@intel.com> >>> Co-developed-by: Zhao Liu <zhao1.liu@intel.com> >>> Signed-off-by: Zhao Liu <zhao1.liu@intel.com> >>> Tested-by: Babu Moger <babu.moger@amd.com> >>> Tested-by: Yongwei Ma <yongwei.ma@intel.com> >>> Acked-by: Michael S. Tsirkin <mst@redhat.com> >>> --- >>> Changes since v6: >>> * Update the comment when check cluster-id. Since there's no >>> v8.2, the cluster-id support should at least start from v9.0. >>> >>> Changes since v5: >>> * Update the comment when check cluster-id. Since current QEMU is >>> v8.2, the cluster-id support should at least start from v8.3. >>> >>> Changes since v3: >>> * Use the imperative in the commit message. (Babu) >>> --- >>> hw/i386/x86.c | 33 +++++++++++++++++++++++++-------- >>> target/i386/cpu.c | 2 ++ >>> target/i386/cpu.h | 1 + >>> 3 files changed, 28 insertions(+), 8 deletions(-) >>> >>> diff --git a/hw/i386/x86.c b/hw/i386/x86.c >>> index 5269aae3a5c2..1c1d368614ee 100644 >>> --- a/hw/i386/x86.c >>> +++ b/hw/i386/x86.c >>> @@ -329,6 +329,14 @@ void x86_cpu_pre_plug(HotplugHandler *hotplug_dev, >>> cpu->die_id = 0; >>> } >>> + /* >>> + * cluster-id was optional in QEMU 9.0 and older, so keep it optional >>> + * if there's only one cluster per die. >>> + */ >>> + if (cpu->cluster_id < 0 && ms->smp.clusters == 1) { >>> + cpu->cluster_id = 0; >>> + } >>> + >>> if (cpu->socket_id < 0) { >>> error_setg(errp, "CPU socket-id is not set"); >>> return; >>> @@ -345,6 +353,14 @@ void x86_cpu_pre_plug(HotplugHandler *hotplug_dev, >>> cpu->die_id, ms->smp.dies - 1); >>> return; >>> } >>> + if (cpu->cluster_id < 0) { >>> + error_setg(errp, "CPU cluster-id is not set"); >>> + return; >>> + } else if (cpu->cluster_id > ms->smp.clusters - 1) { >>> + error_setg(errp, "Invalid CPU cluster-id: %u must be in range 0:%u", >>> + cpu->cluster_id, ms->smp.clusters - 1); >>> + return; >>> + } >>> if (cpu->core_id < 0) { >>> error_setg(errp, "CPU core-id is not set"); >>> return; >>> @@ -364,16 +380,9 @@ void x86_cpu_pre_plug(HotplugHandler *hotplug_dev, >>> topo_ids.pkg_id = cpu->socket_id; >>> topo_ids.die_id = cpu->die_id; >>> + topo_ids.module_id = cpu->cluster_id; >>> topo_ids.core_id = cpu->core_id; >>> topo_ids.smt_id = cpu->thread_id; >>> - >>> - /* >>> - * TODO: This is the temporary initialization for topo_ids.module_id to >>> - * avoid "maybe-uninitialized" compilation errors. Will remove when >>> - * X86CPU supports cluster_id. >>> - */ >>> - topo_ids.module_id = 0; >>> - >>> cpu->apic_id = x86_apicid_from_topo_ids(&topo_info, &topo_ids); >>> } >>> @@ -418,6 +427,14 @@ void x86_cpu_pre_plug(HotplugHandler *hotplug_dev, >>> } >>> cpu->die_id = topo_ids.die_id; >>> + if (cpu->cluster_id != -1 && cpu->cluster_id != topo_ids.module_id) { >>> + error_setg(errp, "property cluster-id: %u doesn't match set apic-id:" >>> + " 0x%x (cluster-id: %u)", cpu->cluster_id, cpu->apic_id, >>> + topo_ids.module_id); >>> + return; >>> + } >>> + cpu->cluster_id = topo_ids.module_id; >>> + >>> if (cpu->core_id != -1 && cpu->core_id != topo_ids.core_id) { >>> error_setg(errp, "property core-id: %u doesn't match set apic-id:" >>> " 0x%x (core-id: %u)", cpu->core_id, cpu->apic_id, >>> diff --git a/target/i386/cpu.c b/target/i386/cpu.c >>> index a2d39d2198b6..498a4be62b40 100644 >>> --- a/target/i386/cpu.c >>> +++ b/target/i386/cpu.c >>> @@ -7909,12 +7909,14 @@ static Property x86_cpu_properties[] = { >>> DEFINE_PROP_UINT32("apic-id", X86CPU, apic_id, 0), >>> DEFINE_PROP_INT32("thread-id", X86CPU, thread_id, 0), >>> DEFINE_PROP_INT32("core-id", X86CPU, core_id, 0), >>> + DEFINE_PROP_INT32("cluster-id", X86CPU, cluster_id, 0), >>> DEFINE_PROP_INT32("die-id", X86CPU, die_id, 0), >>> DEFINE_PROP_INT32("socket-id", X86CPU, socket_id, 0), >>> #else >>> DEFINE_PROP_UINT32("apic-id", X86CPU, apic_id, UNASSIGNED_APIC_ID), >>> DEFINE_PROP_INT32("thread-id", X86CPU, thread_id, -1), >>> DEFINE_PROP_INT32("core-id", X86CPU, core_id, -1), >>> + DEFINE_PROP_INT32("cluster-id", X86CPU, cluster_id, -1), >>> DEFINE_PROP_INT32("die-id", X86CPU, die_id, -1), >>> DEFINE_PROP_INT32("socket-id", X86CPU, socket_id, -1), >>> #endif >>> diff --git a/target/i386/cpu.h b/target/i386/cpu.h >>> index 97b290e10576..009950b87203 100644 >>> --- a/target/i386/cpu.h >>> +++ b/target/i386/cpu.h >>> @@ -2057,6 +2057,7 @@ struct ArchCPU { >>> int32_t node_id; /* NUMA node this CPU belongs to */ >>> int32_t socket_id; >>> int32_t die_id; >>> + int32_t cluster_id; >>> int32_t core_id; >>> int32_t thread_id; >>
(Also cc "machine core" maintainers.) Hi Xiaoyao, On Mon, Jan 15, 2024 at 12:18:17PM +0800, Xiaoyao Li wrote: > Date: Mon, 15 Jan 2024 12:18:17 +0800 > From: Xiaoyao Li <xiaoyao.li@intel.com> > Subject: Re: [PATCH v7 10/16] i386/cpu: Introduce cluster-id to X86CPU > > On 1/15/2024 11:27 AM, Zhao Liu wrote: > > On Sun, Jan 14, 2024 at 09:49:18PM +0800, Xiaoyao Li wrote: > > > Date: Sun, 14 Jan 2024 21:49:18 +0800 > > > From: Xiaoyao Li <xiaoyao.li@intel.com> > > > Subject: Re: [PATCH v7 10/16] i386/cpu: Introduce cluster-id to X86CPU > > > > > > On 1/8/2024 4:27 PM, Zhao Liu wrote: > > > > From: Zhuocheng Ding <zhuocheng.ding@intel.com> > > > > > > > > Introduce cluster-id other than module-id to be consistent with > > > > CpuInstanceProperties.cluster-id, and this avoids the confusion > > > > of parameter names when hotplugging. > > > > > > I don't think reusing 'cluster' from arm for x86's 'module' is a good idea. > > > It introduces confusion around the code. > > > > There is a precedent: generic "socket" v.s. i386 "package". > > It's not the same thing. "socket" vs "package" is just software people and > hardware people chose different name. It's just different naming issue. No, it's a similar issue. Same physical device, different name only. Furthermore, the topology was introduced for resource layout and silicon fabrication, and similar design ideas and fabrication processes are fairly consistent across common current arches. Therefore, it is possible to abstract similar topological hierarchies for different arches. > > however, here it's reusing name issue while 'cluster' has been defined for > x86. It does introduce confusion. There's nothing fundamentally different between the x86 module and the generic cluster, is there? This is the reason that I don't agree with introducing "modules" in -smp. > > > The direct definition of cluster is the level that is above the "core" > > and shares the hardware resources including L2. In this sense, arm's > > cluster is the same as x86's module. > > then, what about intel implements tile level in the future? why ARM's > 'cluster' is mapped to 'module', but not 'tile' ? This depends on the actual need. Module (for x86) and cluster (in general) are similar, and tile (for x86) is used for L3 in practice, so I use module rather than tile to map generic clusters. And, it should be noted that x86 module is mapped to the generic cluster, not to ARM's. It's just that currently only ARM is using the clusters option in -smp. I believe QEMU provides the abstract and unified topology hierarchies in -smp, not the arch-specific hierarchies. > > reusing 'cluster' for 'module' is just a bad idea. > > > Though different arches have different naming styles, but QEMU's generic > > code still need the uniform topology hierarchy. > > generic code can provide as many topology levels as it can. each ARCH can > choose to use the ones it supports. > > e.g., > > in qapi/machine.json, it says, > > # The ordering from highest/coarsest to lowest/finest is: > # @drawers, @books, @sockets, @dies, @clusters, @cores, @threads. This ordering is well-defined... > # > # Different architectures support different subsets of topology > # containers. > # > # For example, s390x does not have clusters and dies, and the socket > # is the parent container of cores. > > we can update it to > > # The ordering from highest/coarsest to lowest/finest is: > # @drawers, @books, @sockets, @dies, @clusters, @module, @cores, > # @threads. ...but here it's impossible to figure out why cluster is above module, and even I can't come up with the difference between cluster and module. > # > # Different architectures support different subsets of topology > # containers. > # > # For example, s390x does not have clusters and dies, and the socket > # is the parent container of cores. > # > # For example, x86 does not have drawers and books, and does not support > # cluster. > > even if cluster of x86 is supported someday in the future, we can remove the > ordering requirement from above description. x86's cluster is above the package. To reserve this name for x86, we can't have the well-defined topology ordering. But topology ordering is necessary in generic code, and many calculations depend on the topology ordering. > > > > > > > s390 just added 'drawer' and 'book' in cpu topology[1]. I think we can also > > > add a module level for x86 instead of reusing cluster. > > > > > > (This is also what I want to reply to the cover letter.) > > > > > > [1] https://lore.kernel.org/qemu-devel/20231016183925.2384704-1-nsg@linux.ibm.com/ > > > > These two new levels have the clear topological hierarchy relationship > > and don't duplicate existing ones. > > > > "book" or "drawer" may correspond to intel's "cluster". > > > > Maybe, in the future, we could support for arch-specific alias topologies > > in -smp. > > I don't think we need alias, reusing 'cluster' for 'module' doesn't gain any > benefit except avoid adding a new field in SMPconfiguration. All the other > cluster code is ARM specific and x86 cannot share. The point is that there is no difference between intel module and general cluster...Considering only the naming issue, even AMD has the "complex" to correspond to the Intel's "module". > > I don't think it's a problem to add 'module' to SMPconfiguration. Adding an option is simple, but however, it is not conducive to the topology maintenance of QEMU, reusing the existing generic structure should be the first consideration except when the new level is fundamentally different. Thanks, Zhao
On 1/15/2024 1:59 PM, Zhao Liu wrote: > (Also cc "machine core" maintainers.) > > Hi Xiaoyao, > > On Mon, Jan 15, 2024 at 12:18:17PM +0800, Xiaoyao Li wrote: >> Date: Mon, 15 Jan 2024 12:18:17 +0800 >> From: Xiaoyao Li <xiaoyao.li@intel.com> >> Subject: Re: [PATCH v7 10/16] i386/cpu: Introduce cluster-id to X86CPU >> >> On 1/15/2024 11:27 AM, Zhao Liu wrote: >>> On Sun, Jan 14, 2024 at 09:49:18PM +0800, Xiaoyao Li wrote: >>>> Date: Sun, 14 Jan 2024 21:49:18 +0800 >>>> From: Xiaoyao Li <xiaoyao.li@intel.com> >>>> Subject: Re: [PATCH v7 10/16] i386/cpu: Introduce cluster-id to X86CPU >>>> >>>> On 1/8/2024 4:27 PM, Zhao Liu wrote: >>>>> From: Zhuocheng Ding <zhuocheng.ding@intel.com> >>>>> >>>>> Introduce cluster-id other than module-id to be consistent with >>>>> CpuInstanceProperties.cluster-id, and this avoids the confusion >>>>> of parameter names when hotplugging. >>>> >>>> I don't think reusing 'cluster' from arm for x86's 'module' is a good idea. >>>> It introduces confusion around the code. >>> >>> There is a precedent: generic "socket" v.s. i386 "package". >> >> It's not the same thing. "socket" vs "package" is just software people and >> hardware people chose different name. It's just different naming issue. > > No, it's a similar issue. Same physical device, different name only. > > Furthermore, the topology was introduced for resource layout and silicon > fabrication, and similar design ideas and fabrication processes are fairly > consistent across common current arches. Therefore, it is possible to > abstract similar topological hierarchies for different arches. > >> >> however, here it's reusing name issue while 'cluster' has been defined for >> x86. It does introduce confusion. > > There's nothing fundamentally different between the x86 module and the > generic cluster, is there? This is the reason that I don't agree with > introducing "modules" in -smp. generic cluster just means the cluster of processors, i.e, a group of cpus/lps. It is just a middle level between die and core. It can be the module level in intel, or tile level. Further, if per die lp number increases in the future, there might be more middle levels in intel between die and core. Then at that time, how to decide what level should cluster be mapped to? >> >>> The direct definition of cluster is the level that is above the "core" >>> and shares the hardware resources including L2. In this sense, arm's >>> cluster is the same as x86's module. >> >> then, what about intel implements tile level in the future? why ARM's >> 'cluster' is mapped to 'module', but not 'tile' ? > > This depends on the actual need. > > Module (for x86) and cluster (in general) are similar, and tile (for x86) > is used for L3 in practice, so I use module rather than tile to map > generic cluster. > > And, it should be noted that x86 module is mapped to the generic cluster, > not to ARM's. It's just that currently only ARM is using the clusters > option in -smp. > > I believe QEMU provides the abstract and unified topology hierarchies in > -smp, not the arch-specific hierarchies. > >> >> reusing 'cluster' for 'module' is just a bad idea. >> >>> Though different arches have different naming styles, but QEMU's generic >>> code still need the uniform topology hierarchy. >> >> generic code can provide as many topology levels as it can. each ARCH can >> choose to use the ones it supports. >> >> e.g., >> >> in qapi/machine.json, it says, >> >> # The ordering from highest/coarsest to lowest/finest is: >> # @drawers, @books, @sockets, @dies, @clusters, @cores, @threads. > > This ordering is well-defined... > >> # >> # Different architectures support different subsets of topology >> # containers. >> # >> # For example, s390x does not have clusters and dies, and the socket >> # is the parent container of cores. >> >> we can update it to >> >> # The ordering from highest/coarsest to lowest/finest is: >> # @drawers, @books, @sockets, @dies, @clusters, @module, @cores, >> # @threads. > > ...but here it's impossible to figure out why cluster is above module, > and even I can't come up with the difference between cluster and module. > >> # >> # Different architectures support different subsets of topology >> # containers. >> # >> # For example, s390x does not have clusters and dies, and the socket >> # is the parent container of cores. >> # >> # For example, x86 does not have drawers and books, and does not support >> # cluster. >> >> even if cluster of x86 is supported someday in the future, we can remove the >> ordering requirement from above description. > > x86's cluster is above the package. > > To reserve this name for x86, we can't have the well-defined topology > ordering. > > But topology ordering is necessary in generic code, and many > calculations depend on the topology ordering. could you point me to the code? >> >>>> >>>> s390 just added 'drawer' and 'book' in cpu topology[1]. I think we can also >>>> add a module level for x86 instead of reusing cluster. >>>> >>>> (This is also what I want to reply to the cover letter.) >>>> >>>> [1] https://lore.kernel.org/qemu-devel/20231016183925.2384704-1-nsg@linux.ibm.com/ >>> >>> These two new levels have the clear topological hierarchy relationship >>> and don't duplicate existing ones. >>> >>> "book" or "drawer" may correspond to intel's "cluster". >>> >>> Maybe, in the future, we could support for arch-specific alias topologies >>> in -smp. >> >> I don't think we need alias, reusing 'cluster' for 'module' doesn't gain any >> benefit except avoid adding a new field in SMPconfiguration. All the other >> cluster code is ARM specific and x86 cannot share. > > The point is that there is no difference between intel module and general > cluster...Considering only the naming issue, even AMD has the "complex" to > correspond to the Intel's "module". does complex of AMD really match with intel module? L3 cache is shared in one complex, while L2 cache is shared in one module for now. >> >> I don't think it's a problem to add 'module' to SMPconfiguration. > > Adding an option is simple, but however, it is not conducive to the > topology maintenance of QEMU, reusing the existing generic structure > should be the first consideration except when the new level is > fundamentally different. > > Thanks, > Zhao >
Hi Xiaoyao, On Mon, Jan 15, 2024 at 03:45:58PM +0800, Xiaoyao Li wrote: > Date: Mon, 15 Jan 2024 15:45:58 +0800 > From: Xiaoyao Li <xiaoyao.li@intel.com> > Subject: Re: [PATCH v7 10/16] i386/cpu: Introduce cluster-id to X86CPU > > On 1/15/2024 1:59 PM, Zhao Liu wrote: > > (Also cc "machine core" maintainers.) > > > > Hi Xiaoyao, > > > > On Mon, Jan 15, 2024 at 12:18:17PM +0800, Xiaoyao Li wrote: > > > Date: Mon, 15 Jan 2024 12:18:17 +0800 > > > From: Xiaoyao Li <xiaoyao.li@intel.com> > > > Subject: Re: [PATCH v7 10/16] i386/cpu: Introduce cluster-id to X86CPU > > > > > > On 1/15/2024 11:27 AM, Zhao Liu wrote: > > > > On Sun, Jan 14, 2024 at 09:49:18PM +0800, Xiaoyao Li wrote: > > > > > Date: Sun, 14 Jan 2024 21:49:18 +0800 > > > > > From: Xiaoyao Li <xiaoyao.li@intel.com> > > > > > Subject: Re: [PATCH v7 10/16] i386/cpu: Introduce cluster-id to X86CPU > > > > > > > > > > On 1/8/2024 4:27 PM, Zhao Liu wrote: > > > > > > From: Zhuocheng Ding <zhuocheng.ding@intel.com> > > > > > > > > > > > > Introduce cluster-id other than module-id to be consistent with > > > > > > CpuInstanceProperties.cluster-id, and this avoids the confusion > > > > > > of parameter names when hotplugging. > > > > > > > > > > I don't think reusing 'cluster' from arm for x86's 'module' is a good idea. > > > > > It introduces confusion around the code. > > > > > > > > There is a precedent: generic "socket" v.s. i386 "package". > > > > > > It's not the same thing. "socket" vs "package" is just software people and > > > hardware people chose different name. It's just different naming issue. > > > > No, it's a similar issue. Same physical device, different name only. > > > > Furthermore, the topology was introduced for resource layout and silicon > > fabrication, and similar design ideas and fabrication processes are fairly > > consistent across common current arches. Therefore, it is possible to > > abstract similar topological hierarchies for different arches. > > > > > > > > however, here it's reusing name issue while 'cluster' has been defined for > > > x86. It does introduce confusion. > > > > There's nothing fundamentally different between the x86 module and the > > generic cluster, is there? This is the reason that I don't agree with > > introducing "modules" in -smp. > > generic cluster just means the cluster of processors, i.e, a group of > cpus/lps. It is just a middle level between die and core. Not sure if you mean the "cluster" device for TCG GDB? "cluster" device is different with "cluster" option in -smp. When Yanan introduced the "cluster" option in -smp, he mentioned that it is for sharing L2 and L3 tags, which roughly corresponds to our module. > > It can be the module level in intel, or tile level. Further, if per die lp > number increases in the future, there might be more middle levels in intel > between die and core. Then at that time, how to decide what level should > cluster be mapped to? Currently, there're 3 levels defined in SDM which are between die and core: diegrp, tile and module. In our products, L2 is just sharing on the module, so the intel's module and the general cluster are the best match. There are no commercially available machines for the other levels yet, so there's no way to ensure exactly what the future holds, but we should try to avoid fragmentation of the topology hierarchy and try to maintain the uniform and common topology hierarchies for QEMU. Unless a new level for -smp is introduced in the future when an unsolvable problem is raised. > > > > > > > > The direct definition of cluster is the level that is above the "core" > > > > and shares the hardware resources including L2. In this sense, arm's > > > > cluster is the same as x86's module. > > > > > > then, what about intel implements tile level in the future? why ARM's > > > 'cluster' is mapped to 'module', but not 'tile' ? > > > > This depends on the actual need. > > > > Module (for x86) and cluster (in general) are similar, and tile (for x86) > > is used for L3 in practice, so I use module rather than tile to map > > generic cluster. > > > > And, it should be noted that x86 module is mapped to the generic cluster, > > not to ARM's. It's just that currently only ARM is using the clusters > > option in -smp. > > > > I believe QEMU provides the abstract and unified topology hierarchies in > > -smp, not the arch-specific hierarchies. > > > > > > > > reusing 'cluster' for 'module' is just a bad idea. > > > > > > > Though different arches have different naming styles, but QEMU's generic > > > > code still need the uniform topology hierarchy. > > > > > > generic code can provide as many topology levels as it can. each ARCH can > > > choose to use the ones it supports. > > > > > > e.g., > > > > > > in qapi/machine.json, it says, > > > > > > # The ordering from highest/coarsest to lowest/finest is: > > > # @drawers, @books, @sockets, @dies, @clusters, @cores, @threads. > > > > This ordering is well-defined... > > > > > # > > > # Different architectures support different subsets of topology > > > # containers. > > > # > > > # For example, s390x does not have clusters and dies, and the socket > > > # is the parent container of cores. > > > > > > we can update it to > > > > > > # The ordering from highest/coarsest to lowest/finest is: > > > # @drawers, @books, @sockets, @dies, @clusters, @module, @cores, > > > # @threads. > > > > ...but here it's impossible to figure out why cluster is above module, > > and even I can't come up with the difference between cluster and module. > > > > > # > > > # Different architectures support different subsets of topology > > > # containers. > > > # > > > # For example, s390x does not have clusters and dies, and the socket > > > # is the parent container of cores. > > > # > > > # For example, x86 does not have drawers and books, and does not support > > > # cluster. > > > > > > even if cluster of x86 is supported someday in the future, we can remove the > > > ordering requirement from above description. > > > > x86's cluster is above the package. > > > > To reserve this name for x86, we can't have the well-defined topology > > ordering. > > > > But topology ordering is necessary in generic code, and many > > calculations depend on the topology ordering. > > could you point me to the code? Yes, e.g., there're 2 helpers: machine_topo_get_cores_per_socket() and machine_topo_get_threads_per_socket(). > > > > > > > > > > > > > > s390 just added 'drawer' and 'book' in cpu topology[1]. I think we can also > > > > > add a module level for x86 instead of reusing cluster. > > > > > > > > > > (This is also what I want to reply to the cover letter.) > > > > > > > > > > [1] https://lore.kernel.org/qemu-devel/20231016183925.2384704-1-nsg@linux.ibm.com/ > > > > > > > > These two new levels have the clear topological hierarchy relationship > > > > and don't duplicate existing ones. > > > > > > > > "book" or "drawer" may correspond to intel's "cluster". > > > > > > > > Maybe, in the future, we could support for arch-specific alias topologies > > > > in -smp. > > > > > > I don't think we need alias, reusing 'cluster' for 'module' doesn't gain any > > > benefit except avoid adding a new field in SMPconfiguration. All the other > > > cluster code is ARM specific and x86 cannot share. > > > > The point is that there is no difference between intel module and general > > cluster...Considering only the naming issue, even AMD has the "complex" to > > correspond to the Intel's "module". > > does complex of AMD really match with intel module? L3 cache is shared in > one complex, while L2 cache is shared in one module for now. If then it could correspond to intel's tile, which is after all a level below die. Thanks, Zhao
On 1/15/2024 11:18 PM, Zhao Liu wrote: > Hi Xiaoyao, > > On Mon, Jan 15, 2024 at 03:45:58PM +0800, Xiaoyao Li wrote: >> Date: Mon, 15 Jan 2024 15:45:58 +0800 >> From: Xiaoyao Li <xiaoyao.li@intel.com> >> Subject: Re: [PATCH v7 10/16] i386/cpu: Introduce cluster-id to X86CPU >> >> On 1/15/2024 1:59 PM, Zhao Liu wrote: >>> (Also cc "machine core" maintainers.) >>>u >>> Hi Xiaoyao, >>> >>> On Mon, Jan 15, 2024 at 12:18:17PM +0800, Xiaoyao Li wrote: >>>> Date: Mon, 15 Jan 2024 12:18:17 +0800 >>>> From: Xiaoyao Li <xiaoyao.li@intel.com> >>>> Subject: Re: [PATCH v7 10/16] i386/cpu: Introduce cluster-id to X86CPU >>>> >>>> On 1/15/2024 11:27 AM, Zhao Liu wrote: >>>>> On Sun, Jan 14, 2024 at 09:49:18PM +0800, Xiaoyao Li wrote: >>>>>> Date: Sun, 14 Jan 2024 21:49:18 +0800 >>>>>> From: Xiaoyao Li <xiaoyao.li@intel.com> >>>>>> Subject: Re: [PATCH v7 10/16] i386/cpu: Introduce cluster-id to X86CPU >>>>>> >>>>>> On 1/8/2024 4:27 PM, Zhao Liu wrote: >>>>>>> From: Zhuocheng Ding <zhuocheng.ding@intel.com> >>>>>>> >>>>>>> Introduce cluster-id other than module-id to be consistent with >>>>>>> CpuInstanceProperties.cluster-id, and this avoids the confusion >>>>>>> of parameter names when hotplugging. >>>>>> >>>>>> I don't think reusing 'cluster' from arm for x86's 'module' is a good idea. >>>>>> It introduces confusion around the code. >>>>> >>>>> There is a precedent: generic "socket" v.s. i386 "package". >>>> >>>> It's not the same thing. "socket" vs "package" is just software people and >>>> hardware people chose different name. It's just different naming issue. >>> >>> No, it's a similar issue. Same physical device, different name only. >>> >>> Furthermore, the topology was introduced for resource layout and silicon >>> fabrication, and similar design ideas and fabrication processes are fairly >>> consistent across common current arches. Therefore, it is possible to >>> abstract similar topological hierarchies for different arches. >>> >>>> >>>> however, here it's reusing name issue while 'cluster' has been defined for >>>> x86. It does introduce confusion. >>> >>> There's nothing fundamentally different between the x86 module and the >>> generic cluster, is there? This is the reason that I don't agree with >>> introducing "modules" in -smp. >> >> generic cluster just means the cluster of processors, i.e, a group of >> cpus/lps. It is just a middle level between die and core. > > Not sure if you mean the "cluster" device for TCG GDB? "cluster" device > is different with "cluster" option in -smp. No, I just mean the word 'cluster'. And I thought what you called "generic cluster" means "a cluster of logical processors" Below I quote the description of Yanan's commit 864c3b5c32f0: A cluster generally means a group of CPU cores which share L2 cache or other mid-level resources, and it is the shared resources that is used to improve scheduler's behavior. From the point of view of the size range, it's between CPU die and CPU core. For example, on some ARM64 Kunpeng servers, we have 6 clusters in each NUMA node, and 4 CPU cores in each cluster. The 4 CPU cores share a separate L2 cache and a L3 cache tag, which brings cache affinity advantage. What I get from it, is, cluster is just a middle level between CPU die and CPU core. The cpu cores inside one cluster shares some mid-level resource. L2 cache is just one example of the shared mid-level resource. So it can be either module level, or tile level in x86, or even the diegrp level you mentioned below. > When Yanan introduced the "cluster" option in -smp, he mentioned that it > is for sharing L2 and L3 tags, which roughly corresponds to our module. > >> >> It can be the module level in intel, or tile level. Further, if per die lp >> number increases in the future, there might be more middle levels in intel >> between die and core. Then at that time, how to decide what level should >> cluster be mapped to? > > Currently, there're 3 levels defined in SDM which are between die and > core: diegrp, tile and module. In our products, L2 is just sharing on the > module, so the intel's module and the general cluster are the best match. you said 'generic cluster' a lot of times. But from my point of view, you are referring to current ARM's cluster instead of *generic* cluster. Anyway, cluster is just a mid-level between die and core. We should not associate it any specific resource. A resource is shared in what level can change, e.g., initially L3 cache is shared in a physical package. When multi-die got supported, L3 cache is shared in one die. Now, on AMD product, L3 cache is shared in one complex, and one die can have 2 complexs thus 2 separate L3 cache in one die. It doesn't matter calling it cluster, or module, or xyz. It is just a name to represent a cpu topology level between die and core. What matters is, once it gets accepted, it becomes formal ABI for users that 'cluster' means 'module' for x86 users. This is definitely a big confusion for people. Maybe people try to figure out why, and find the reason is that 'cluster' means the level at which L2 cache is shared and that's just the module level in x86 shares L2 cache. Maybe in the future, "L2 is shared in module" get changed just like the example I give for L3 above. Then, that's really the big confusion, and all this become the "historic reason" that cluster is chosen to represent module in x86. > There are no commercially available machines for the other levels yet, > so there's no way to ensure exactly what the future holds, but we should > try to avoid fragmentation of the topology hierarchy and try to maintain > the uniform and common topology hierarchies for QEMU. > > Unless a new level for -smp is introduced in the future when an unsolvable > problem is raised. > >> >>>> >>>>> The direct definition of cluster is the level that is above the "core" >>>>> and shares the hardware resources including L2. In this sense, arm's >>>>> cluster is the same as x86's module. >>>> >>>> then, what about intel implements tile level in the future? why ARM's >>>> 'cluster' is mapped to 'module', but not 'tile' ? >>> >>> This depends on the actual need. >>> >>> Module (for x86) and cluster (in general) are similar, and tile (for x86) >>> is used for L3 in practice, so I use module rather than tile to map >>> generic cluster. >>> >>> And, it should be noted that x86 module is mapped to the generic cluster, >>> not to ARM's. It's just that currently only ARM is using the clusters >>> option in -smp. >>> >>> I believe QEMU provides the abstract and unified topology hierarchies in >>> -smp, not the arch-specific hierarchies. >>> >>>> >>>> reusing 'cluster' for 'module' is just a bad idea. >>>> >>>>> Though different arches have different naming styles, but QEMU's generic >>>>> code still need the uniform topology hierarchy. >>>> >>>> generic code can provide as many topology levels as it can. each ARCH can >>>> choose to use the ones it supports. >>>> >>>> e.g., >>>> >>>> in qapi/machine.json, it says, >>>> >>>> # The ordering from highest/coarsest to lowest/finest is: >>>> # @drawers, @books, @sockets, @dies, @clusters, @cores, @threads. >>> >>> This ordering is well-defined... >>> >>>> # >>>> # Different architectures support different subsets of topology >>>> # containers. >>>> # >>>> # For example, s390x does not have clusters and dies, and the socket >>>> # is the parent container of cores. >>>> >>>> we can update it to >>>> >>>> # The ordering from highest/coarsest to lowest/finest is: >>>> # @drawers, @books, @sockets, @dies, @clusters, @module, @cores, >>>> # @threads. >>> >>> ...but here it's impossible to figure out why cluster is above module, >>> and even I can't come up with the difference between cluster and module. >>> >>>> # >>>> # Different architectures support different subsets of topology >>>> # containers. >>>> # >>>> # For example, s390x does not have clusters and dies, and the socket >>>> # is the parent container of cores. >>>> # >>>> # For example, x86 does not have drawers and books, and does not support >>>> # cluster. >>>> >>>> even if cluster of x86 is supported someday in the future, we can remove the >>>> ordering requirement from above description. >>> >>> x86's cluster is above the package. >>> >>> To reserve this name for x86, we can't have the well-defined topology >>> ordering. >>> >>> But topology ordering is necessary in generic code, and many >>> calculations depend on the topology ordering. >> >> could you point me to the code? > > Yes, e.g., there're 2 helpers: machine_topo_get_cores_per_socket() and > machine_topo_get_threads_per_socket(). I see. these two helpers are fragile, that they need to be updated every time new level between core and socket is introduced. Anyway, we can ensure the order for each ARCH, that the valid levels for any ARCH are ordered. e.g., we have @drawers, @books, @sockets, @dies, @clusters, @module, @cores, @threads defined, for s390, the valid levels are @drawers, @books, @sockets, @cores, @threads for arm, the valid levels are @sockets, @dies, @clusters, @cores, @threads (I'm not sure if die is supported for ARM?) for x86, the valid levels are @sockets, @dies, @module, @cores, @threads All of them are ordered. those unsupported level in each ARCH just get value 1. It won't have any issue in the calculation for the default value, but you provided two functions may not be lucky. anyway, they can be fixed at the time when we really go this approach. >> >>>> >>>>>> >>>>>> s390 just added 'drawer' and 'book' in cpu topology[1]. I think we can also >>>>>> add a module level for x86 instead of reusing cluster. >>>>>> >>>>>> (This is also what I want to reply to the cover letter.) >>>>>> >>>>>> [1] https://lore.kernel.org/qemu-devel/20231016183925.2384704-1-nsg@linux.ibm.com/ >>>>> >>>>> These two new levels have the clear topological hierarchy relationship >>>>> and don't duplicate existing ones. >>>>> >>>>> "book" or "drawer" may correspond to intel's "cluster". >>>>> >>>>> Maybe, in the future, we could support for arch-specific alias topologies >>>>> in -smp. >>>> >>>> I don't think we need alias, reusing 'cluster' for 'module' doesn't gain any >>>> benefit except avoid adding a new field in SMPconfiguration. All the other >>>> cluster code is ARM specific and x86 cannot share. >>> >>> The point is that there is no difference between intel module and general >>> cluster...Considering only the naming issue, even AMD has the "complex" to >>> correspond to the Intel's "module". >> >> does complex of AMD really match with intel module? L3 cache is shared in >> one complex, while L2 cache is shared in one module for now. > > If then it could correspond to intel's tile, which is after all a level > below die. So if AMD wants to add complex in smp topology, where should complex level get put? between die and cluster? > Thanks, > Zhao >
On Wed, Jan 17, 2024 at 12:40:12AM +0800, Xiaoyao Li wrote: > Date: Wed, 17 Jan 2024 00:40:12 +0800 > From: Xiaoyao Li <xiaoyao.li@intel.com> > Subject: Re: [PATCH v7 10/16] i386/cpu: Introduce cluster-id to X86CPU > > On 1/15/2024 11:18 PM, Zhao Liu wrote: > > Hi Xiaoyao, > > > > On Mon, Jan 15, 2024 at 03:45:58PM +0800, Xiaoyao Li wrote: > > > Date: Mon, 15 Jan 2024 15:45:58 +0800 > > > From: Xiaoyao Li <xiaoyao.li@intel.com> > > > Subject: Re: [PATCH v7 10/16] i386/cpu: Introduce cluster-id to X86CPU > > > > > > On 1/15/2024 1:59 PM, Zhao Liu wrote: > > > > (Also cc "machine core" maintainers.) > > > > u > > > > Hi Xiaoyao, > > > > > > > > On Mon, Jan 15, 2024 at 12:18:17PM +0800, Xiaoyao Li wrote: > > > > > Date: Mon, 15 Jan 2024 12:18:17 +0800 > > > > > From: Xiaoyao Li <xiaoyao.li@intel.com> > > > > > Subject: Re: [PATCH v7 10/16] i386/cpu: Introduce cluster-id to X86CPU > > > > > > > > > > On 1/15/2024 11:27 AM, Zhao Liu wrote: > > > > > > On Sun, Jan 14, 2024 at 09:49:18PM +0800, Xiaoyao Li wrote: > > > > > > > Date: Sun, 14 Jan 2024 21:49:18 +0800 > > > > > > > From: Xiaoyao Li <xiaoyao.li@intel.com> > > > > > > > Subject: Re: [PATCH v7 10/16] i386/cpu: Introduce cluster-id to X86CPU > > > > > > > > > > > > > > On 1/8/2024 4:27 PM, Zhao Liu wrote: > > > > > > > > From: Zhuocheng Ding <zhuocheng.ding@intel.com> > > > > > > > > > > > > > > > > Introduce cluster-id other than module-id to be consistent with > > > > > > > > CpuInstanceProperties.cluster-id, and this avoids the confusion > > > > > > > > of parameter names when hotplugging. > > > > > > > > > > > > > > I don't think reusing 'cluster' from arm for x86's 'module' is a good idea. > > > > > > > It introduces confusion around the code. > > > > > > > > > > > > There is a precedent: generic "socket" v.s. i386 "package". > > > > > > > > > > It's not the same thing. "socket" vs "package" is just software people and > > > > > hardware people chose different name. It's just different naming issue. > > > > > > > > No, it's a similar issue. Same physical device, different name only. > > > > > > > > Furthermore, the topology was introduced for resource layout and silicon > > > > fabrication, and similar design ideas and fabrication processes are fairly > > > > consistent across common current arches. Therefore, it is possible to > > > > abstract similar topological hierarchies for different arches. > > > > > > > > > > > > > > however, here it's reusing name issue while 'cluster' has been defined for > > > > > x86. It does introduce confusion. > > > > > > > > There's nothing fundamentally different between the x86 module and the > > > > generic cluster, is there? This is the reason that I don't agree with > > > > introducing "modules" in -smp. > > > > > > generic cluster just means the cluster of processors, i.e, a group of > > > cpus/lps. It is just a middle level between die and core. > > > > Not sure if you mean the "cluster" device for TCG GDB? "cluster" device > > is different with "cluster" option in -smp. > > No, I just mean the word 'cluster'. And I thought what you called "generic > cluster" means "a cluster of logical processors" > > Below I quote the description of Yanan's commit 864c3b5c32f0: > > A cluster generally means a group of CPU cores which share L2 cache > or other mid-level resources, and it is the shared resources that > is used to improve scheduler's behavior. From the point of view of > the size range, it's between CPU die and CPU core. For example, on > some ARM64 Kunpeng servers, we have 6 clusters in each NUMA node, > and 4 CPU cores in each cluster. The 4 CPU cores share a separate > L2 cache and a L3 cache tag, which brings cache affinity advantage. > > What I get from it, is, cluster is just a middle level between CPU die and > CPU core. Here the words "a group of CPU" is not the software concept, but a hardware topology. > The cpu cores inside one cluster shares some mid-level resource. > L2 cache is just one example of the shared mid-level resource. So it can be > either module level, or tile level in x86, or even the diegrp level you > mentioned below. In actual hardware design, ARM's cluster is close to intel's module. I'm not seeing any examples of clusters being similar to tile or diegrp. Even within intel, the hardware architects have agreed definitions for each level. > > > When Yanan introduced the "cluster" option in -smp, he mentioned that it > > is for sharing L2 and L3 tags, which roughly corresponds to our module. > > > > > > > > It can be the module level in intel, or tile level. Further, if per die lp > > > number increases in the future, there might be more middle levels in intel > > > between die and core. Then at that time, how to decide what level should > > > cluster be mapped to? > > > > Currently, there're 3 levels defined in SDM which are between die and > > core: diegrp, tile and module. In our products, L2 is just sharing on the > > module, so the intel's module and the general cluster are the best match. > > you said 'generic cluster' a lot of times. But from my point of view, you > are referring to current ARM's cluster instead of *generic* cluster. No, I'm always talking about the "cluster" in -smp, not ARM specific cluster. ARM just maps its arch-specific cluster to the general one. > > Anyway, cluster is just a mid-level between die and core. We should not > associate it any specific resource. A resource is shared in what level can > change, e.g., initially L3 cache is shared in a physical package. When > multi-die got supported, L3 cache is shared in one die. Now, on AMD product, > L3 cache is shared in one complex, and one die can have 2 complexs thus 2 > separate L3 cache in one die. > It doesn't matter calling it cluster, or module, or xyz. It is just a name > to represent a cpu topology level between die and core. In the case of more complex topologies, QEMU may consider supporting aliasing. Vender can support topology aliases for its own arch, but there is no possibility of discarding QEMU's unified topology hierarchy in favor of building arch-specific hierarchies. > What matters is, > once it gets accepted, it becomes formal ABI for users that 'cluster' means > 'module' for x86 users. This is definitely a big confusion for people. Maybe > people try to figure out why, and find the reason is that 'cluster' means > the level at which L2 cache is shared and that's just the module level in > x86 shares L2 cache. Maybe in the future, "L2 is shared in module" get > changed just like the example I give for L3 above. My decision to map modules to clusters is based on existing and future product topology characteristics, which are supported by hardware practices. Of course, in the upcoming our cache topology patch series, users can customize the cache topology hierarchy. > Then, that's really the big confusion, I don't think this will confuse the user. All details can be explained clearly by document. > and all this become the "historic reason" that cluster is > chosen to represent module in x86. > > > There are no commercially available machines for the other levels yet, > > so there's no way to ensure exactly what the future holds, but we should > > try to avoid fragmentation of the topology hierarchy and try to maintain > > the uniform and common topology hierarchies for QEMU. > > > > Unless a new level for -smp is introduced in the future when an unsolvable > > problem is raised. > > > > > > > > > > > > > > > > The direct definition of cluster is the level that is above the "core" > > > > > > and shares the hardware resources including L2. In this sense, arm's > > > > > > cluster is the same as x86's module. > > > > > > > > > > then, what about intel implements tile level in the future? why ARM's > > > > > 'cluster' is mapped to 'module', but not 'tile' ? > > > > > > > > This depends on the actual need. > > > > > > > > Module (for x86) and cluster (in general) are similar, and tile (for x86) > > > > is used for L3 in practice, so I use module rather than tile to map > > > > generic cluster. > > > > > > > > And, it should be noted that x86 module is mapped to the generic cluster, > > > > not to ARM's. It's just that currently only ARM is using the clusters > > > > option in -smp. > > > > > > > > I believe QEMU provides the abstract and unified topology hierarchies in > > > > -smp, not the arch-specific hierarchies. > > > > > > > > > > > > > > reusing 'cluster' for 'module' is just a bad idea. > > > > > > > > > > > Though different arches have different naming styles, but QEMU's generic > > > > > > code still need the uniform topology hierarchy. > > > > > > > > > > generic code can provide as many topology levels as it can. each ARCH can > > > > > choose to use the ones it supports. > > > > > > > > > > e.g., > > > > > > > > > > in qapi/machine.json, it says, > > > > > > > > > > # The ordering from highest/coarsest to lowest/finest is: > > > > > # @drawers, @books, @sockets, @dies, @clusters, @cores, @threads. > > > > > > > > This ordering is well-defined... > > > > > > > > > # > > > > > # Different architectures support different subsets of topology > > > > > # containers. > > > > > # > > > > > # For example, s390x does not have clusters and dies, and the socket > > > > > # is the parent container of cores. > > > > > > > > > > we can update it to > > > > > > > > > > # The ordering from highest/coarsest to lowest/finest is: > > > > > # @drawers, @books, @sockets, @dies, @clusters, @module, @cores, > > > > > # @threads. > > > > > > > > ...but here it's impossible to figure out why cluster is above module, > > > > and even I can't come up with the difference between cluster and module. > > > > > > > > > # > > > > > # Different architectures support different subsets of topology > > > > > # containers. > > > > > # > > > > > # For example, s390x does not have clusters and dies, and the socket > > > > > # is the parent container of cores. > > > > > # > > > > > # For example, x86 does not have drawers and books, and does not support > > > > > # cluster. > > > > > > > > > > even if cluster of x86 is supported someday in the future, we can remove the > > > > > ordering requirement from above description. > > > > > > > > x86's cluster is above the package. > > > > > > > > To reserve this name for x86, we can't have the well-defined topology > > > > ordering. > > > > > > > > But topology ordering is necessary in generic code, and many > > > > calculations depend on the topology ordering. > > > > > > could you point me to the code? > > > > Yes, e.g., there're 2 helpers: machine_topo_get_cores_per_socket() and > > machine_topo_get_threads_per_socket(). > > I see. these two helpers are fragile, that they need to be updated every > time new level between core and socket is introduced. > > Anyway, we can ensure the order for each ARCH, that the valid levels for any > ARCH are ordered. e.g., we have > > @drawers, @books, @sockets, @dies, @clusters, @module, @cores, @threads Sorry to repeat my previous objection: the order why cluster is above the module can't be well-defined. This is more confusing. It is not possible to add a new level without clearly defining the hierarchical relationship. Different venders have their different names, there is no reason or possibility to cram all the names of all the vender into this arrangement, and it is not maintainable. > > defined, > > for s390, the valid levels are > > @drawers, @books, @sockets, @cores, @threads > > for arm, the valid levels are > > @sockets, @dies, @clusters, @cores, @threads > (I'm not sure if die is supported for ARM?) > > for x86, the valid levels are > > @sockets, @dies, @module, @cores, @threads > > All of them are ordered. those unsupported level in each ARCH just get value > 1. It won't have any issue in the calculation for the default value, but you > provided two functions may not be lucky. anyway, they can be fixed at the > time when we really go this approach. > > > > > > > > > > > > > > > > > > > > > > > s390 just added 'drawer' and 'book' in cpu topology[1]. I think we can also > > > > > > > add a module level for x86 instead of reusing cluster. > > > > > > > > > > > > > > (This is also what I want to reply to the cover letter.) > > > > > > > > > > > > > > [1] https://lore.kernel.org/qemu-devel/20231016183925.2384704-1-nsg@linux.ibm.com/ > > > > > > > > > > > > These two new levels have the clear topological hierarchy relationship > > > > > > and don't duplicate existing ones. > > > > > > > > > > > > "book" or "drawer" may correspond to intel's "cluster". > > > > > > > > > > > > Maybe, in the future, we could support for arch-specific alias topologies > > > > > > in -smp. > > > > > > > > > > I don't think we need alias, reusing 'cluster' for 'module' doesn't gain any > > > > > benefit except avoid adding a new field in SMPconfiguration. All the other > > > > > cluster code is ARM specific and x86 cannot share. > > > > > > > > The point is that there is no difference between intel module and general > > > > cluster...Considering only the naming issue, even AMD has the "complex" to > > > > correspond to the Intel's "module". > > > > > > does complex of AMD really match with intel module? L3 cache is shared in > > > one complex, while L2 cache is shared in one module for now. > > > > If then it could correspond to intel's tile, which is after all a level > > below die. > > So if AMD wants to add complex in smp topology, where should complex level > get put? between die and cluster? That's just an example, and just showed that AMD and intel have naming differences, even if they are both x86. We can't be happy with everyone. Thanks, Zhao
Hi Xiaoyao, > > > > generic cluster just means the cluster of processors, i.e, a group of > > > > cpus/lps. It is just a middle level between die and core. > > > > > > Not sure if you mean the "cluster" device for TCG GDB? "cluster" device > > > is different with "cluster" option in -smp. > > > > No, I just mean the word 'cluster'. And I thought what you called "generic > > cluster" means "a cluster of logical processors" > > > > Below I quote the description of Yanan's commit 864c3b5c32f0: > > > > A cluster generally means a group of CPU cores which share L2 cache > > or other mid-level resources, and it is the shared resources that > > is used to improve scheduler's behavior. From the point of view of > > the size range, it's between CPU die and CPU core. For example, on > > some ARM64 Kunpeng servers, we have 6 clusters in each NUMA node, > > and 4 CPU cores in each cluster. The 4 CPU cores share a separate > > L2 cache and a L3 cache tag, which brings cache affinity advantage. > > > > What I get from it, is, cluster is just a middle level between CPU die and > > CPU core. > > Here the words "a group of CPU" is not the software concept, but a hardware > topology. When I found this material: https://www.kernel.org/doc/Documentation/devicetree/bindings/cpu/cpu-topology.txt I realized the most essential difference between cluster and module is that cluster supports nesting, i.e. it can have nesting clusters as a layer of CPU topology. Even though QEMU's description of cluster looked similar to module when it was introduced, it is impossible to envision whether ARM/RISCV and other device tree-based arches will continue to introduce nesting clusters in the future. To avoid potential conflicts, it would be better to introduce modules for x86 to differentiate them from clusters. Thanks, Zhao
diff --git a/hw/i386/x86.c b/hw/i386/x86.c index 5269aae3a5c2..1c1d368614ee 100644 --- a/hw/i386/x86.c +++ b/hw/i386/x86.c @@ -329,6 +329,14 @@ void x86_cpu_pre_plug(HotplugHandler *hotplug_dev, cpu->die_id = 0; } + /* + * cluster-id was optional in QEMU 9.0 and older, so keep it optional + * if there's only one cluster per die. + */ + if (cpu->cluster_id < 0 && ms->smp.clusters == 1) { + cpu->cluster_id = 0; + } + if (cpu->socket_id < 0) { error_setg(errp, "CPU socket-id is not set"); return; @@ -345,6 +353,14 @@ void x86_cpu_pre_plug(HotplugHandler *hotplug_dev, cpu->die_id, ms->smp.dies - 1); return; } + if (cpu->cluster_id < 0) { + error_setg(errp, "CPU cluster-id is not set"); + return; + } else if (cpu->cluster_id > ms->smp.clusters - 1) { + error_setg(errp, "Invalid CPU cluster-id: %u must be in range 0:%u", + cpu->cluster_id, ms->smp.clusters - 1); + return; + } if (cpu->core_id < 0) { error_setg(errp, "CPU core-id is not set"); return; @@ -364,16 +380,9 @@ void x86_cpu_pre_plug(HotplugHandler *hotplug_dev, topo_ids.pkg_id = cpu->socket_id; topo_ids.die_id = cpu->die_id; + topo_ids.module_id = cpu->cluster_id; topo_ids.core_id = cpu->core_id; topo_ids.smt_id = cpu->thread_id; - - /* - * TODO: This is the temporary initialization for topo_ids.module_id to - * avoid "maybe-uninitialized" compilation errors. Will remove when - * X86CPU supports cluster_id. - */ - topo_ids.module_id = 0; - cpu->apic_id = x86_apicid_from_topo_ids(&topo_info, &topo_ids); } @@ -418,6 +427,14 @@ void x86_cpu_pre_plug(HotplugHandler *hotplug_dev, } cpu->die_id = topo_ids.die_id; + if (cpu->cluster_id != -1 && cpu->cluster_id != topo_ids.module_id) { + error_setg(errp, "property cluster-id: %u doesn't match set apic-id:" + " 0x%x (cluster-id: %u)", cpu->cluster_id, cpu->apic_id, + topo_ids.module_id); + return; + } + cpu->cluster_id = topo_ids.module_id; + if (cpu->core_id != -1 && cpu->core_id != topo_ids.core_id) { error_setg(errp, "property core-id: %u doesn't match set apic-id:" " 0x%x (core-id: %u)", cpu->core_id, cpu->apic_id, diff --git a/target/i386/cpu.c b/target/i386/cpu.c index a2d39d2198b6..498a4be62b40 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -7909,12 +7909,14 @@ static Property x86_cpu_properties[] = { DEFINE_PROP_UINT32("apic-id", X86CPU, apic_id, 0), DEFINE_PROP_INT32("thread-id", X86CPU, thread_id, 0), DEFINE_PROP_INT32("core-id", X86CPU, core_id, 0), + DEFINE_PROP_INT32("cluster-id", X86CPU, cluster_id, 0), DEFINE_PROP_INT32("die-id", X86CPU, die_id, 0), DEFINE_PROP_INT32("socket-id", X86CPU, socket_id, 0), #else DEFINE_PROP_UINT32("apic-id", X86CPU, apic_id, UNASSIGNED_APIC_ID), DEFINE_PROP_INT32("thread-id", X86CPU, thread_id, -1), DEFINE_PROP_INT32("core-id", X86CPU, core_id, -1), + DEFINE_PROP_INT32("cluster-id", X86CPU, cluster_id, -1), DEFINE_PROP_INT32("die-id", X86CPU, die_id, -1), DEFINE_PROP_INT32("socket-id", X86CPU, socket_id, -1), #endif diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 97b290e10576..009950b87203 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -2057,6 +2057,7 @@ struct ArchCPU { int32_t node_id; /* NUMA node this CPU belongs to */ int32_t socket_id; int32_t die_id; + int32_t cluster_id; int32_t core_id; int32_t thread_id;