Message ID | 20220403145953.10522-5-gshan@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | hw/arm/virt: Fix CPU's default NUMA node ID | expand |
On Sun, 3 Apr 2022 22:59:53 +0800 Gavin Shan <gshan@redhat.com> wrote: > When the PPTT table is built, the CPU topology is re-calculated, but > it's unecessary because the CPU topology has been populated in > virt_possible_cpu_arch_ids() on arm/virt machine. > > This reworks build_pptt() to avoid by reusing the existing one in > ms->possible_cpus. Currently, the only user of build_pptt() is > arm/virt machine. > > Signed-off-by: Gavin Shan <gshan@redhat.com> Hi Gavin, My compiler isn't being very smart today and gives a bunch of maybe used uninitialized for socket_offset, cluster_offset and core_offset. They probably are initialized in all real paths, but I think you need to set them to something at instantiation to keep the compiler happy. Thanks, Jonathan > --- > hw/acpi/aml-build.c | 100 +++++++++++++++++--------------------------- > 1 file changed, 38 insertions(+), 62 deletions(-) > > diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c > index 4086879ebf..4b0f9df3e3 100644 > --- a/hw/acpi/aml-build.c > +++ b/hw/acpi/aml-build.c > @@ -2002,86 +2002,62 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms, > const char *oem_id, const char *oem_table_id) > { > MachineClass *mc = MACHINE_GET_CLASS(ms); > - GQueue *list = g_queue_new(); > - guint pptt_start = table_data->len; > - guint parent_offset; > - guint length, i; > - int uid = 0; > - int socket; > + CPUArchIdList *cpus = ms->possible_cpus; > + int64_t socket_id = -1, cluster_id = -1, core_id = -1; > + uint32_t socket_offset, cluster_offset, core_offset; > + uint32_t pptt_start = table_data->len; > + int n; > AcpiTable table = { .sig = "PPTT", .rev = 2, > .oem_id = oem_id, .oem_table_id = oem_table_id }; > > acpi_table_begin(&table, table_data); > > - for (socket = 0; socket < ms->smp.sockets; socket++) { > - g_queue_push_tail(list, > - GUINT_TO_POINTER(table_data->len - pptt_start)); > - build_processor_hierarchy_node( > - table_data, > - /* > - * Physical package - represents the boundary > - * of a physical package > - */ > - (1 << 0), > - 0, socket, NULL, 0); > - } > + for (n = 0; n < cpus->len; n++) { > + if (cpus->cpus[n].props.socket_id != socket_id) { > + socket_id = cpus->cpus[n].props.socket_id; > + cluster_id = -1; > + core_id = -1; > + socket_offset = table_data->len - pptt_start; > + build_processor_hierarchy_node(table_data, > + (1 << 0), /* Physical package */ > + 0, socket_id, NULL, 0); > + } > > - if (mc->smp_props.clusters_supported) { > - length = g_queue_get_length(list); > - for (i = 0; i < length; i++) { > - int cluster; > - > - parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list)); > - for (cluster = 0; cluster < ms->smp.clusters; cluster++) { > - g_queue_push_tail(list, > - GUINT_TO_POINTER(table_data->len - pptt_start)); > - build_processor_hierarchy_node( > - table_data, > - (0 << 0), /* not a physical package */ > - parent_offset, cluster, NULL, 0); > + if (mc->smp_props.clusters_supported) { > + if (cpus->cpus[n].props.cluster_id != cluster_id) { > + cluster_id = cpus->cpus[n].props.cluster_id; > + core_id = -1; > + cluster_offset = table_data->len - pptt_start; > + build_processor_hierarchy_node(table_data, > + (0 << 0), /* Not a physical package */ > + socket_offset, cluster_id, NULL, 0); > } > + } else { > + cluster_offset = socket_offset; > } > - } > > - length = g_queue_get_length(list); > - for (i = 0; i < length; i++) { > - int core; > - > - parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list)); > - for (core = 0; core < ms->smp.cores; core++) { > - if (ms->smp.threads > 1) { > - g_queue_push_tail(list, > - GUINT_TO_POINTER(table_data->len - pptt_start)); > - build_processor_hierarchy_node( > - table_data, > + if (ms->smp.threads <= 1) { > + build_processor_hierarchy_node(table_data, > + (1 << 1) | /* ACPI Processor ID valid */ > + (1 << 3), /* Node is a Leaf */ > + cluster_offset, n, NULL, 0); > + } else { > + if (cpus->cpus[n].props.core_id != core_id) { > + core_id = cpus->cpus[n].props.core_id; > + core_offset = table_data->len - pptt_start; > + build_processor_hierarchy_node(table_data, > (0 << 0), /* not a physical package */ > - parent_offset, core, NULL, 0); > - } else { > - build_processor_hierarchy_node( > - table_data, > - (1 << 1) | /* ACPI Processor ID valid */ > - (1 << 3), /* Node is a Leaf */ > - parent_offset, uid++, NULL, 0); > + cluster_offset, core_id, NULL, 0); > } > - } > - } > - > - length = g_queue_get_length(list); > - for (i = 0; i < length; i++) { > - int thread; > > - parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list)); > - for (thread = 0; thread < ms->smp.threads; thread++) { > - build_processor_hierarchy_node( > - table_data, > + build_processor_hierarchy_node(table_data, > (1 << 1) | /* ACPI Processor ID valid */ > (1 << 2) | /* Processor is a Thread */ > (1 << 3), /* Node is a Leaf */ > - parent_offset, uid++, NULL, 0); > + core_offset, n, NULL, 0); > } > } > > - g_queue_free(list); > acpi_table_end(linker, &table); > } >
Hi Jonathan, On 4/12/22 11:40 PM, Jonathan Cameron wrote: > On Sun, 3 Apr 2022 22:59:53 +0800 > Gavin Shan <gshan@redhat.com> wrote: >> When the PPTT table is built, the CPU topology is re-calculated, but >> it's unecessary because the CPU topology has been populated in >> virt_possible_cpu_arch_ids() on arm/virt machine. >> >> This reworks build_pptt() to avoid by reusing the existing one in >> ms->possible_cpus. Currently, the only user of build_pptt() is >> arm/virt machine. >> >> Signed-off-by: Gavin Shan <gshan@redhat.com> > > My compiler isn't being very smart today and gives a bunch of > maybe used uninitialized for socket_offset, cluster_offset and core_offset. > > They probably are initialized in all real paths, but I think you need to > set them to something at instantiation to keep the compiler happy. > Thanks for reporting the warning raised from the compiler. I think your compiler may be smarter than mine :) Yeah, they're initialized in all real paths after checking the code again. So lets initialize them with zeroes in v6, but I would hold for Igor and Yanan's reviews on this (v5) series. Thanks, Gavin > >> --- >> hw/acpi/aml-build.c | 100 +++++++++++++++++--------------------------- >> 1 file changed, 38 insertions(+), 62 deletions(-) >> >> diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c >> index 4086879ebf..4b0f9df3e3 100644 >> --- a/hw/acpi/aml-build.c >> +++ b/hw/acpi/aml-build.c >> @@ -2002,86 +2002,62 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms, >> const char *oem_id, const char *oem_table_id) >> { >> MachineClass *mc = MACHINE_GET_CLASS(ms); >> - GQueue *list = g_queue_new(); >> - guint pptt_start = table_data->len; >> - guint parent_offset; >> - guint length, i; >> - int uid = 0; >> - int socket; >> + CPUArchIdList *cpus = ms->possible_cpus; >> + int64_t socket_id = -1, cluster_id = -1, core_id = -1; >> + uint32_t socket_offset, cluster_offset, core_offset; >> + uint32_t pptt_start = table_data->len; >> + int n; >> AcpiTable table = { .sig = "PPTT", .rev = 2, >> .oem_id = oem_id, .oem_table_id = oem_table_id }; >> >> acpi_table_begin(&table, table_data); >> >> - for (socket = 0; socket < ms->smp.sockets; socket++) { >> - g_queue_push_tail(list, >> - GUINT_TO_POINTER(table_data->len - pptt_start)); >> - build_processor_hierarchy_node( >> - table_data, >> - /* >> - * Physical package - represents the boundary >> - * of a physical package >> - */ >> - (1 << 0), >> - 0, socket, NULL, 0); >> - } >> + for (n = 0; n < cpus->len; n++) { >> + if (cpus->cpus[n].props.socket_id != socket_id) { >> + socket_id = cpus->cpus[n].props.socket_id; >> + cluster_id = -1; >> + core_id = -1; >> + socket_offset = table_data->len - pptt_start; >> + build_processor_hierarchy_node(table_data, >> + (1 << 0), /* Physical package */ >> + 0, socket_id, NULL, 0); >> + } >> >> - if (mc->smp_props.clusters_supported) { >> - length = g_queue_get_length(list); >> - for (i = 0; i < length; i++) { >> - int cluster; >> - >> - parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list)); >> - for (cluster = 0; cluster < ms->smp.clusters; cluster++) { >> - g_queue_push_tail(list, >> - GUINT_TO_POINTER(table_data->len - pptt_start)); >> - build_processor_hierarchy_node( >> - table_data, >> - (0 << 0), /* not a physical package */ >> - parent_offset, cluster, NULL, 0); >> + if (mc->smp_props.clusters_supported) { >> + if (cpus->cpus[n].props.cluster_id != cluster_id) { >> + cluster_id = cpus->cpus[n].props.cluster_id; >> + core_id = -1; >> + cluster_offset = table_data->len - pptt_start; >> + build_processor_hierarchy_node(table_data, >> + (0 << 0), /* Not a physical package */ >> + socket_offset, cluster_id, NULL, 0); >> } >> + } else { >> + cluster_offset = socket_offset; >> } >> - } >> >> - length = g_queue_get_length(list); >> - for (i = 0; i < length; i++) { >> - int core; >> - >> - parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list)); >> - for (core = 0; core < ms->smp.cores; core++) { >> - if (ms->smp.threads > 1) { >> - g_queue_push_tail(list, >> - GUINT_TO_POINTER(table_data->len - pptt_start)); >> - build_processor_hierarchy_node( >> - table_data, >> + if (ms->smp.threads <= 1) { >> + build_processor_hierarchy_node(table_data, >> + (1 << 1) | /* ACPI Processor ID valid */ >> + (1 << 3), /* Node is a Leaf */ >> + cluster_offset, n, NULL, 0); >> + } else { >> + if (cpus->cpus[n].props.core_id != core_id) { >> + core_id = cpus->cpus[n].props.core_id; >> + core_offset = table_data->len - pptt_start; >> + build_processor_hierarchy_node(table_data, >> (0 << 0), /* not a physical package */ >> - parent_offset, core, NULL, 0); >> - } else { >> - build_processor_hierarchy_node( >> - table_data, >> - (1 << 1) | /* ACPI Processor ID valid */ >> - (1 << 3), /* Node is a Leaf */ >> - parent_offset, uid++, NULL, 0); >> + cluster_offset, core_id, NULL, 0); >> } >> - } >> - } >> - >> - length = g_queue_get_length(list); >> - for (i = 0; i < length; i++) { >> - int thread; >> >> - parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list)); >> - for (thread = 0; thread < ms->smp.threads; thread++) { >> - build_processor_hierarchy_node( >> - table_data, >> + build_processor_hierarchy_node(table_data, >> (1 << 1) | /* ACPI Processor ID valid */ >> (1 << 2) | /* Processor is a Thread */ >> (1 << 3), /* Node is a Leaf */ >> - parent_offset, uid++, NULL, 0); >> + core_offset, n, NULL, 0); >> } >> } >> >> - g_queue_free(list); >> acpi_table_end(linker, &table); >> } >> >
On Sun, 3 Apr 2022 22:59:53 +0800 Gavin Shan <gshan@redhat.com> wrote: > When the PPTT table is built, the CPU topology is re-calculated, but > it's unecessary because the CPU topology has been populated in > virt_possible_cpu_arch_ids() on arm/virt machine. > > This reworks build_pptt() to avoid by reusing the existing one in > ms->possible_cpus. Currently, the only user of build_pptt() is > arm/virt machine. > > Signed-off-by: Gavin Shan <gshan@redhat.com> > --- > hw/acpi/aml-build.c | 100 +++++++++++++++++--------------------------- > 1 file changed, 38 insertions(+), 62 deletions(-) > > diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c > index 4086879ebf..4b0f9df3e3 100644 > --- a/hw/acpi/aml-build.c > +++ b/hw/acpi/aml-build.c > @@ -2002,86 +2002,62 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms, > const char *oem_id, const char *oem_table_id) > { > MachineClass *mc = MACHINE_GET_CLASS(ms); > - GQueue *list = g_queue_new(); > - guint pptt_start = table_data->len; > - guint parent_offset; > - guint length, i; > - int uid = 0; > - int socket; > + CPUArchIdList *cpus = ms->possible_cpus; > + int64_t socket_id = -1, cluster_id = -1, core_id = -1; > + uint32_t socket_offset, cluster_offset, core_offset; > + uint32_t pptt_start = table_data->len; > + int n; > AcpiTable table = { .sig = "PPTT", .rev = 2, > .oem_id = oem_id, .oem_table_id = oem_table_id }; > > acpi_table_begin(&table, table_data); > > - for (socket = 0; socket < ms->smp.sockets; socket++) { > - g_queue_push_tail(list, > - GUINT_TO_POINTER(table_data->len - pptt_start)); > - build_processor_hierarchy_node( > - table_data, > - /* > - * Physical package - represents the boundary > - * of a physical package > - */ > - (1 << 0), > - 0, socket, NULL, 0); > - } > + for (n = 0; n < cpus->len; n++) { > + if (cpus->cpus[n].props.socket_id != socket_id) { > + socket_id = cpus->cpus[n].props.socket_id; this relies on cpus->cpus[n].props.*_id being sorted form top to down levels I'd add here and for other container_id an assert() that checks for that specific ID goes in only one direction, to be able to detect when rule is broken. otherwise on may end up with duplicate containers silently. > + cluster_id = -1; > + core_id = -1; > + socket_offset = table_data->len - pptt_start; > + build_processor_hierarchy_node(table_data, > + (1 << 0), /* Physical package */ > + 0, socket_id, NULL, 0); > + } > > - if (mc->smp_props.clusters_supported) { > - length = g_queue_get_length(list); > - for (i = 0; i < length; i++) { > - int cluster; > - > - parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list)); > - for (cluster = 0; cluster < ms->smp.clusters; cluster++) { > - g_queue_push_tail(list, > - GUINT_TO_POINTER(table_data->len - pptt_start)); > - build_processor_hierarchy_node( > - table_data, > - (0 << 0), /* not a physical package */ > - parent_offset, cluster, NULL, 0); > + if (mc->smp_props.clusters_supported) { > + if (cpus->cpus[n].props.cluster_id != cluster_id) { > + cluster_id = cpus->cpus[n].props.cluster_id; > + core_id = -1; > + cluster_offset = table_data->len - pptt_start; > + build_processor_hierarchy_node(table_data, > + (0 << 0), /* Not a physical package */ > + socket_offset, cluster_id, NULL, 0); > } > + } else { > + cluster_offset = socket_offset; > } > - } > > - length = g_queue_get_length(list); > - for (i = 0; i < length; i++) { > - int core; > - > - parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list)); > - for (core = 0; core < ms->smp.cores; core++) { > - if (ms->smp.threads > 1) { > - g_queue_push_tail(list, > - GUINT_TO_POINTER(table_data->len - pptt_start)); > - build_processor_hierarchy_node( > - table_data, > + if (ms->smp.threads <= 1) { why <= instead of < is used here? > + build_processor_hierarchy_node(table_data, > + (1 << 1) | /* ACPI Processor ID valid */ > + (1 << 3), /* Node is a Leaf */ > + cluster_offset, n, NULL, 0); > + } else { > + if (cpus->cpus[n].props.core_id != core_id) { > + core_id = cpus->cpus[n].props.core_id; > + core_offset = table_data->len - pptt_start; > + build_processor_hierarchy_node(table_data, > (0 << 0), /* not a physical package */ > - parent_offset, core, NULL, 0); > - } else { > - build_processor_hierarchy_node( > - table_data, > - (1 << 1) | /* ACPI Processor ID valid */ > - (1 << 3), /* Node is a Leaf */ > - parent_offset, uid++, NULL, 0); > + cluster_offset, core_id, NULL, 0); > } > - } > - } > - > - length = g_queue_get_length(list); > - for (i = 0; i < length; i++) { > - int thread; > > - parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list)); > - for (thread = 0; thread < ms->smp.threads; thread++) { > - build_processor_hierarchy_node( > - table_data, > + build_processor_hierarchy_node(table_data, > (1 << 1) | /* ACPI Processor ID valid */ > (1 << 2) | /* Processor is a Thread */ > (1 << 3), /* Node is a Leaf */ > - parent_offset, uid++, NULL, 0); > + core_offset, n, NULL, 0); > } > } > > - g_queue_free(list); > acpi_table_end(linker, &table); > } >
Hi Igor, On 4/13/22 9:52 PM, Igor Mammedov wrote: > On Sun, 3 Apr 2022 22:59:53 +0800 > Gavin Shan <gshan@redhat.com> wrote: > >> When the PPTT table is built, the CPU topology is re-calculated, but >> it's unecessary because the CPU topology has been populated in >> virt_possible_cpu_arch_ids() on arm/virt machine. >> >> This reworks build_pptt() to avoid by reusing the existing one in >> ms->possible_cpus. Currently, the only user of build_pptt() is >> arm/virt machine. >> >> Signed-off-by: Gavin Shan <gshan@redhat.com> >> --- >> hw/acpi/aml-build.c | 100 +++++++++++++++++--------------------------- >> 1 file changed, 38 insertions(+), 62 deletions(-) >> >> diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c >> index 4086879ebf..4b0f9df3e3 100644 >> --- a/hw/acpi/aml-build.c >> +++ b/hw/acpi/aml-build.c >> @@ -2002,86 +2002,62 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms, >> const char *oem_id, const char *oem_table_id) >> { >> MachineClass *mc = MACHINE_GET_CLASS(ms); >> - GQueue *list = g_queue_new(); >> - guint pptt_start = table_data->len; >> - guint parent_offset; >> - guint length, i; >> - int uid = 0; >> - int socket; >> + CPUArchIdList *cpus = ms->possible_cpus; >> + int64_t socket_id = -1, cluster_id = -1, core_id = -1; >> + uint32_t socket_offset, cluster_offset, core_offset; >> + uint32_t pptt_start = table_data->len; >> + int n; >> AcpiTable table = { .sig = "PPTT", .rev = 2, >> .oem_id = oem_id, .oem_table_id = oem_table_id }; >> >> acpi_table_begin(&table, table_data); >> >> - for (socket = 0; socket < ms->smp.sockets; socket++) { >> - g_queue_push_tail(list, >> - GUINT_TO_POINTER(table_data->len - pptt_start)); >> - build_processor_hierarchy_node( >> - table_data, >> - /* >> - * Physical package - represents the boundary >> - * of a physical package >> - */ >> - (1 << 0), >> - 0, socket, NULL, 0); >> - } >> + for (n = 0; n < cpus->len; n++) { > >> + if (cpus->cpus[n].props.socket_id != socket_id) { >> + socket_id = cpus->cpus[n].props.socket_id; > > this relies on cpus->cpus[n].props.*_id being sorted form top to down levels > I'd add here and for other container_id an assert() that checks for that > specific ID goes in only one direction, to be able to detect when rule is broken. > > otherwise on may end up with duplicate containers silently. > Exactly. cpus->cpus[n].props.*_id is sorted as you said in virt_possible_cpu_arch_ids(). The only user of build_pptt() is arm/virt machine. So it's fine. However, I think I may need add comments for this in v6. /* * This works with the assumption that cpus[n].props.*_id has been * sorted from top to down levels in mc->possible_cpu_arch_ids(). * Otherwise, the unexpected and duplicate containers will be created. */ The implementation in v3 looks complicated, but comprehensive. The one in this revision (v6) looks simple, but the we're losing flexibility :) >> + cluster_id = -1; >> + core_id = -1; >> + socket_offset = table_data->len - pptt_start; >> + build_processor_hierarchy_node(table_data, >> + (1 << 0), /* Physical package */ >> + 0, socket_id, NULL, 0); >> + } >> >> - if (mc->smp_props.clusters_supported) { >> - length = g_queue_get_length(list); >> - for (i = 0; i < length; i++) { >> - int cluster; >> - >> - parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list)); >> - for (cluster = 0; cluster < ms->smp.clusters; cluster++) { >> - g_queue_push_tail(list, >> - GUINT_TO_POINTER(table_data->len - pptt_start)); >> - build_processor_hierarchy_node( >> - table_data, >> - (0 << 0), /* not a physical package */ >> - parent_offset, cluster, NULL, 0); >> + if (mc->smp_props.clusters_supported) { >> + if (cpus->cpus[n].props.cluster_id != cluster_id) { >> + cluster_id = cpus->cpus[n].props.cluster_id; >> + core_id = -1; >> + cluster_offset = table_data->len - pptt_start; >> + build_processor_hierarchy_node(table_data, >> + (0 << 0), /* Not a physical package */ >> + socket_offset, cluster_id, NULL, 0); >> } >> + } else { >> + cluster_offset = socket_offset; >> } >> - } >> >> - length = g_queue_get_length(list); >> - for (i = 0; i < length; i++) { >> - int core; >> - >> - parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list)); >> - for (core = 0; core < ms->smp.cores; core++) { >> - if (ms->smp.threads > 1) { >> - g_queue_push_tail(list, >> - GUINT_TO_POINTER(table_data->len - pptt_start)); >> - build_processor_hierarchy_node( >> - table_data, >> + if (ms->smp.threads <= 1) { > > why <= instead of < is used here? > It's the counterpart to the one in the original implementation, which is "if (ms->smp.threads > 1)". the nodes for threads won't be created until ms->smp.threads is bigger than 1. If we want to use "<" here, then the code will be changed like below in v6. However, I really don't think it's necessary. if (ms->smp.threads < 2) { : } >> + build_processor_hierarchy_node(table_data, >> + (1 << 1) | /* ACPI Processor ID valid */ >> + (1 << 3), /* Node is a Leaf */ >> + cluster_offset, n, NULL, 0); >> + } else { >> + if (cpus->cpus[n].props.core_id != core_id) { >> + core_id = cpus->cpus[n].props.core_id; >> + core_offset = table_data->len - pptt_start; >> + build_processor_hierarchy_node(table_data, >> (0 << 0), /* not a physical package */ >> - parent_offset, core, NULL, 0); >> - } else { >> - build_processor_hierarchy_node( >> - table_data, >> - (1 << 1) | /* ACPI Processor ID valid */ >> - (1 << 3), /* Node is a Leaf */ >> - parent_offset, uid++, NULL, 0); >> + cluster_offset, core_id, NULL, 0); >> } >> - } >> - } >> - >> - length = g_queue_get_length(list); >> - for (i = 0; i < length; i++) { >> - int thread; >> >> - parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list)); >> - for (thread = 0; thread < ms->smp.threads; thread++) { >> - build_processor_hierarchy_node( >> - table_data, >> + build_processor_hierarchy_node(table_data, >> (1 << 1) | /* ACPI Processor ID valid */ >> (1 << 2) | /* Processor is a Thread */ >> (1 << 3), /* Node is a Leaf */ >> - parent_offset, uid++, NULL, 0); >> + core_offset, n, NULL, 0); >> } >> } >> >> - g_queue_free(list); >> acpi_table_end(linker, &table); >> } >> Thanks, Gavin
On 2022/4/14 8:33, Gavin Shan wrote: > Hi Igor, > > On 4/13/22 9:52 PM, Igor Mammedov wrote: >> On Sun, 3 Apr 2022 22:59:53 +0800 >> Gavin Shan <gshan@redhat.com> wrote: >> >>> When the PPTT table is built, the CPU topology is re-calculated, but >>> it's unecessary because the CPU topology has been populated in >>> virt_possible_cpu_arch_ids() on arm/virt machine. >>> >>> This reworks build_pptt() to avoid by reusing the existing one in >>> ms->possible_cpus. Currently, the only user of build_pptt() is >>> arm/virt machine. >>> >>> Signed-off-by: Gavin Shan <gshan@redhat.com> >>> --- >>> hw/acpi/aml-build.c | 100 >>> +++++++++++++++++--------------------------- >>> 1 file changed, 38 insertions(+), 62 deletions(-) >>> >>> diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c >>> index 4086879ebf..4b0f9df3e3 100644 >>> --- a/hw/acpi/aml-build.c >>> +++ b/hw/acpi/aml-build.c >>> @@ -2002,86 +2002,62 @@ void build_pptt(GArray *table_data, >>> BIOSLinker *linker, MachineState *ms, >>> const char *oem_id, const char *oem_table_id) >>> { >>> MachineClass *mc = MACHINE_GET_CLASS(ms); >>> - GQueue *list = g_queue_new(); >>> - guint pptt_start = table_data->len; >>> - guint parent_offset; >>> - guint length, i; >>> - int uid = 0; >>> - int socket; >>> + CPUArchIdList *cpus = ms->possible_cpus; >>> + int64_t socket_id = -1, cluster_id = -1, core_id = -1; >>> + uint32_t socket_offset, cluster_offset, core_offset; >>> + uint32_t pptt_start = table_data->len; >>> + int n; >>> AcpiTable table = { .sig = "PPTT", .rev = 2, >>> .oem_id = oem_id, .oem_table_id = >>> oem_table_id }; >>> acpi_table_begin(&table, table_data); >>> - for (socket = 0; socket < ms->smp.sockets; socket++) { >>> - g_queue_push_tail(list, >>> - GUINT_TO_POINTER(table_data->len - pptt_start)); >>> - build_processor_hierarchy_node( >>> - table_data, >>> - /* >>> - * Physical package - represents the boundary >>> - * of a physical package >>> - */ >>> - (1 << 0), >>> - 0, socket, NULL, 0); >>> - } >>> + for (n = 0; n < cpus->len; n++) { >> >>> + if (cpus->cpus[n].props.socket_id != socket_id) { >>> + socket_id = cpus->cpus[n].props.socket_id; >> >> this relies on cpus->cpus[n].props.*_id being sorted form top to down >> levels >> I'd add here and for other container_id an assert() that checks for that >> specific ID goes in only one direction, to be able to detect when >> rule is broken. >> >> otherwise on may end up with duplicate containers silently. >> > > Exactly. cpus->cpus[n].props.*_id is sorted as you said in > virt_possible_cpu_arch_ids(). > The only user of build_pptt() is arm/virt machine. So it's fine. > However, I think I > may need add comments for this in v6. > > /* > * This works with the assumption that cpus[n].props.*_id has been > * sorted from top to down levels in mc->possible_cpu_arch_ids(). > * Otherwise, the unexpected and duplicate containers will be > created. > */ > > The implementation in v3 looks complicated, but comprehensive. The one > in this revision (v6) looks simple, but the we're losing flexibility :) > > >>> + cluster_id = -1; >>> + core_id = -1; >>> + socket_offset = table_data->len - pptt_start; >>> + build_processor_hierarchy_node(table_data, >>> + (1 << 0), /* Physical package */ >>> + 0, socket_id, NULL, 0); >>> + } >>> - if (mc->smp_props.clusters_supported) { >>> - length = g_queue_get_length(list); >>> - for (i = 0; i < length; i++) { >>> - int cluster; >>> - >>> - parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list)); >>> - for (cluster = 0; cluster < ms->smp.clusters; cluster++) { >>> - g_queue_push_tail(list, >>> - GUINT_TO_POINTER(table_data->len - pptt_start)); >>> - build_processor_hierarchy_node( >>> - table_data, >>> - (0 << 0), /* not a physical package */ >>> - parent_offset, cluster, NULL, 0); >>> + if (mc->smp_props.clusters_supported) { >>> + if (cpus->cpus[n].props.cluster_id != cluster_id) { >>> + cluster_id = cpus->cpus[n].props.cluster_id; >>> + core_id = -1; >>> + cluster_offset = table_data->len - pptt_start; >>> + build_processor_hierarchy_node(table_data, >>> + (0 << 0), /* Not a physical package */ >>> + socket_offset, cluster_id, NULL, 0); >>> } >>> + } else { >>> + cluster_offset = socket_offset; >>> } >>> - } >>> - length = g_queue_get_length(list); >>> - for (i = 0; i < length; i++) { >>> - int core; >>> - >>> - parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list)); >>> - for (core = 0; core < ms->smp.cores; core++) { >>> - if (ms->smp.threads > 1) { >>> - g_queue_push_tail(list, >>> - GUINT_TO_POINTER(table_data->len - pptt_start)); >>> - build_processor_hierarchy_node( >>> - table_data, >>> + if (ms->smp.threads <= 1) { >> >> why <= instead of < is used here? >> > > It's the counterpart to the one in the original implementation, > which is "if (ms->smp.threads > 1)". the nodes for threads won't > be created until ms->smp.threads is bigger than 1. If we want > to use "<" here, then the code will be changed like below in v6. > However, I really don't think it's necessary. > > if (ms->smp.threads < 2) { > : > } > The smp_parse() guarantees that we either have "one threads" or "multi threads" in ms->smp. So I think there are two proper choices: if (ms->smp.threads == 1) { ... } else { ... } or if (ms->smp.threads > 1) { ... } else { ... } Thanks, Yanan > >>> + build_processor_hierarchy_node(table_data, >>> + (1 << 1) | /* ACPI Processor ID valid */ >>> + (1 << 3), /* Node is a Leaf */ >>> + cluster_offset, n, NULL, 0); >>> + } else { >>> + if (cpus->cpus[n].props.core_id != core_id) { >>> + core_id = cpus->cpus[n].props.core_id; >>> + core_offset = table_data->len - pptt_start; >>> + build_processor_hierarchy_node(table_data, >>> (0 << 0), /* not a physical package */ >>> - parent_offset, core, NULL, 0); >>> - } else { >>> - build_processor_hierarchy_node( >>> - table_data, >>> - (1 << 1) | /* ACPI Processor ID valid */ >>> - (1 << 3), /* Node is a Leaf */ >>> - parent_offset, uid++, NULL, 0); >>> + cluster_offset, core_id, NULL, 0); >>> } >>> - } >>> - } >>> - >>> - length = g_queue_get_length(list); >>> - for (i = 0; i < length; i++) { >>> - int thread; >>> - parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list)); >>> - for (thread = 0; thread < ms->smp.threads; thread++) { >>> - build_processor_hierarchy_node( >>> - table_data, >>> + build_processor_hierarchy_node(table_data, >>> (1 << 1) | /* ACPI Processor ID valid */ >>> (1 << 2) | /* Processor is a Thread */ >>> (1 << 3), /* Node is a Leaf */ >>> - parent_offset, uid++, NULL, 0); >>> + core_offset, n, NULL, 0); >>> } >>> } >>> - g_queue_free(list); >>> acpi_table_end(linker, &table); >>> } > > Thanks, > Gavin > > .
Hi Gavin, On 2022/4/3 22:59, Gavin Shan wrote: > When the PPTT table is built, the CPU topology is re-calculated, but > it's unecessary because the CPU topology has been populated in > virt_possible_cpu_arch_ids() on arm/virt machine. > > This reworks build_pptt() to avoid by reusing the existing one in > ms->possible_cpus. Currently, the only user of build_pptt() is > arm/virt machine. > > Signed-off-by: Gavin Shan <gshan@redhat.com> > --- > hw/acpi/aml-build.c | 100 +++++++++++++++++--------------------------- > 1 file changed, 38 insertions(+), 62 deletions(-) > > diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c > index 4086879ebf..4b0f9df3e3 100644 > --- a/hw/acpi/aml-build.c > +++ b/hw/acpi/aml-build.c > @@ -2002,86 +2002,62 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms, > const char *oem_id, const char *oem_table_id) > { > MachineClass *mc = MACHINE_GET_CLASS(ms); > - GQueue *list = g_queue_new(); > - guint pptt_start = table_data->len; > - guint parent_offset; > - guint length, i; > - int uid = 0; > - int socket; > + CPUArchIdList *cpus = ms->possible_cpus; > + int64_t socket_id = -1, cluster_id = -1, core_id = -1; nit: why not using "int" for the ID variables? (to be consistent with how the IDs are stored in CpuInstanceProperties). Thanks, Yanan > + uint32_t socket_offset, cluster_offset, core_offset; > + uint32_t pptt_start = table_data->len; > + int n; > AcpiTable table = { .sig = "PPTT", .rev = 2, > .oem_id = oem_id, .oem_table_id = oem_table_id }; > > acpi_table_begin(&table, table_data); > > - for (socket = 0; socket < ms->smp.sockets; socket++) { > - g_queue_push_tail(list, > - GUINT_TO_POINTER(table_data->len - pptt_start)); > - build_processor_hierarchy_node( > - table_data, > - /* > - * Physical package - represents the boundary > - * of a physical package > - */ > - (1 << 0), > - 0, socket, NULL, 0); > - } > + for (n = 0; n < cpus->len; n++) { > + if (cpus->cpus[n].props.socket_id != socket_id) { > + socket_id = cpus->cpus[n].props.socket_id; > + cluster_id = -1; > + core_id = -1; > + socket_offset = table_data->len - pptt_start; > + build_processor_hierarchy_node(table_data, > + (1 << 0), /* Physical package */ > + 0, socket_id, NULL, 0); > + } > > - if (mc->smp_props.clusters_supported) { > - length = g_queue_get_length(list); > - for (i = 0; i < length; i++) { > - int cluster; > - > - parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list)); > - for (cluster = 0; cluster < ms->smp.clusters; cluster++) { > - g_queue_push_tail(list, > - GUINT_TO_POINTER(table_data->len - pptt_start)); > - build_processor_hierarchy_node( > - table_data, > - (0 << 0), /* not a physical package */ > - parent_offset, cluster, NULL, 0); > + if (mc->smp_props.clusters_supported) { > + if (cpus->cpus[n].props.cluster_id != cluster_id) { > + cluster_id = cpus->cpus[n].props.cluster_id; > + core_id = -1; > + cluster_offset = table_data->len - pptt_start; > + build_processor_hierarchy_node(table_data, > + (0 << 0), /* Not a physical package */ > + socket_offset, cluster_id, NULL, 0); > } > + } else { > + cluster_offset = socket_offset; > } > - } > > - length = g_queue_get_length(list); > - for (i = 0; i < length; i++) { > - int core; > - > - parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list)); > - for (core = 0; core < ms->smp.cores; core++) { > - if (ms->smp.threads > 1) { > - g_queue_push_tail(list, > - GUINT_TO_POINTER(table_data->len - pptt_start)); > - build_processor_hierarchy_node( > - table_data, > + if (ms->smp.threads <= 1) { > + build_processor_hierarchy_node(table_data, > + (1 << 1) | /* ACPI Processor ID valid */ > + (1 << 3), /* Node is a Leaf */ > + cluster_offset, n, NULL, 0); > + } else { > + if (cpus->cpus[n].props.core_id != core_id) { > + core_id = cpus->cpus[n].props.core_id; > + core_offset = table_data->len - pptt_start; > + build_processor_hierarchy_node(table_data, > (0 << 0), /* not a physical package */ > - parent_offset, core, NULL, 0); > - } else { > - build_processor_hierarchy_node( > - table_data, > - (1 << 1) | /* ACPI Processor ID valid */ > - (1 << 3), /* Node is a Leaf */ > - parent_offset, uid++, NULL, 0); > + cluster_offset, core_id, NULL, 0); > } > - } > - } > - > - length = g_queue_get_length(list); > - for (i = 0; i < length; i++) { > - int thread; > > - parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list)); > - for (thread = 0; thread < ms->smp.threads; thread++) { > - build_processor_hierarchy_node( > - table_data, > + build_processor_hierarchy_node(table_data, > (1 << 1) | /* ACPI Processor ID valid */ > (1 << 2) | /* Processor is a Thread */ > (1 << 3), /* Node is a Leaf */ > - parent_offset, uid++, NULL, 0); > + core_offset, n, NULL, 0); > } > } > > - g_queue_free(list); > acpi_table_end(linker, &table); > } >
Hi Yanan, On 4/14/22 10:56 AM, wangyanan (Y) wrote: > On 2022/4/14 8:33, Gavin Shan wrote: >> On 4/13/22 9:52 PM, Igor Mammedov wrote: >>> On Sun, 3 Apr 2022 22:59:53 +0800 >>> Gavin Shan <gshan@redhat.com> wrote: >>> >>>> When the PPTT table is built, the CPU topology is re-calculated, but >>>> it's unecessary because the CPU topology has been populated in >>>> virt_possible_cpu_arch_ids() on arm/virt machine. >>>> >>>> This reworks build_pptt() to avoid by reusing the existing one in >>>> ms->possible_cpus. Currently, the only user of build_pptt() is >>>> arm/virt machine. >>>> >>>> Signed-off-by: Gavin Shan <gshan@redhat.com> >>>> --- >>>> hw/acpi/aml-build.c | 100 +++++++++++++++++--------------------------- >>>> 1 file changed, 38 insertions(+), 62 deletions(-) >>>> >>>> diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c >>>> index 4086879ebf..4b0f9df3e3 100644 >>>> --- a/hw/acpi/aml-build.c >>>> +++ b/hw/acpi/aml-build.c >>>> @@ -2002,86 +2002,62 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms, >>>> const char *oem_id, const char *oem_table_id) >>>> { >>>> MachineClass *mc = MACHINE_GET_CLASS(ms); >>>> - GQueue *list = g_queue_new(); >>>> - guint pptt_start = table_data->len; >>>> - guint parent_offset; >>>> - guint length, i; >>>> - int uid = 0; >>>> - int socket; >>>> + CPUArchIdList *cpus = ms->possible_cpus; >>>> + int64_t socket_id = -1, cluster_id = -1, core_id = -1; >>>> + uint32_t socket_offset, cluster_offset, core_offset; >>>> + uint32_t pptt_start = table_data->len; >>>> + int n; >>>> AcpiTable table = { .sig = "PPTT", .rev = 2, >>>> .oem_id = oem_id, .oem_table_id = oem_table_id }; >>>> acpi_table_begin(&table, table_data); >>>> - for (socket = 0; socket < ms->smp.sockets; socket++) { >>>> - g_queue_push_tail(list, >>>> - GUINT_TO_POINTER(table_data->len - pptt_start)); >>>> - build_processor_hierarchy_node( >>>> - table_data, >>>> - /* >>>> - * Physical package - represents the boundary >>>> - * of a physical package >>>> - */ >>>> - (1 << 0), >>>> - 0, socket, NULL, 0); >>>> - } >>>> + for (n = 0; n < cpus->len; n++) { >>> >>>> + if (cpus->cpus[n].props.socket_id != socket_id) { >>>> + socket_id = cpus->cpus[n].props.socket_id; >>> >>> this relies on cpus->cpus[n].props.*_id being sorted form top to down levels >>> I'd add here and for other container_id an assert() that checks for that >>> specific ID goes in only one direction, to be able to detect when rule is broken. >>> >>> otherwise on may end up with duplicate containers silently. >>> >> >> Exactly. cpus->cpus[n].props.*_id is sorted as you said in virt_possible_cpu_arch_ids(). >> The only user of build_pptt() is arm/virt machine. So it's fine. However, I think I >> may need add comments for this in v6. >> >> /* >> * This works with the assumption that cpus[n].props.*_id has been >> * sorted from top to down levels in mc->possible_cpu_arch_ids(). >> * Otherwise, the unexpected and duplicate containers will be created. >> */ >> >> The implementation in v3 looks complicated, but comprehensive. The one >> in this revision (v6) looks simple, but the we're losing flexibility :) >> >> >>>> + cluster_id = -1; >>>> + core_id = -1; >>>> + socket_offset = table_data->len - pptt_start; >>>> + build_processor_hierarchy_node(table_data, >>>> + (1 << 0), /* Physical package */ >>>> + 0, socket_id, NULL, 0); >>>> + } >>>> - if (mc->smp_props.clusters_supported) { >>>> - length = g_queue_get_length(list); >>>> - for (i = 0; i < length; i++) { >>>> - int cluster; >>>> - >>>> - parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list)); >>>> - for (cluster = 0; cluster < ms->smp.clusters; cluster++) { >>>> - g_queue_push_tail(list, >>>> - GUINT_TO_POINTER(table_data->len - pptt_start)); >>>> - build_processor_hierarchy_node( >>>> - table_data, >>>> - (0 << 0), /* not a physical package */ >>>> - parent_offset, cluster, NULL, 0); >>>> + if (mc->smp_props.clusters_supported) { >>>> + if (cpus->cpus[n].props.cluster_id != cluster_id) { >>>> + cluster_id = cpus->cpus[n].props.cluster_id; >>>> + core_id = -1; >>>> + cluster_offset = table_data->len - pptt_start; >>>> + build_processor_hierarchy_node(table_data, >>>> + (0 << 0), /* Not a physical package */ >>>> + socket_offset, cluster_id, NULL, 0); >>>> } >>>> + } else { >>>> + cluster_offset = socket_offset; >>>> } >>>> - } >>>> - length = g_queue_get_length(list); >>>> - for (i = 0; i < length; i++) { >>>> - int core; >>>> - >>>> - parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list)); >>>> - for (core = 0; core < ms->smp.cores; core++) { >>>> - if (ms->smp.threads > 1) { >>>> - g_queue_push_tail(list, >>>> - GUINT_TO_POINTER(table_data->len - pptt_start)); >>>> - build_processor_hierarchy_node( >>>> - table_data, >>>> + if (ms->smp.threads <= 1) { >>> >>> why <= instead of < is used here? >>> >> >> It's the counterpart to the one in the original implementation, >> which is "if (ms->smp.threads > 1)". the nodes for threads won't >> be created until ms->smp.threads is bigger than 1. If we want >> to use "<" here, then the code will be changed like below in v6. >> However, I really don't think it's necessary. >> >> if (ms->smp.threads < 2) { >> : >> } >> > The smp_parse() guarantees that we either have "one threads" or > "multi threads" in ms->smp. So I think there are two proper choices: > if (ms->smp.threads == 1) { > ... > } else { > ... > } > > or > > if (ms->smp.threads > 1) { > ... > } else { > ... > } > Yes, it's guaranteed ms->smp.threads is equal to or larger than 1. I will use the pattern of 'if (ms->smp.threads == 1)' in v6. >>>> + build_processor_hierarchy_node(table_data, >>>> + (1 << 1) | /* ACPI Processor ID valid */ >>>> + (1 << 3), /* Node is a Leaf */ >>>> + cluster_offset, n, NULL, 0); >>>> + } else { >>>> + if (cpus->cpus[n].props.core_id != core_id) { >>>> + core_id = cpus->cpus[n].props.core_id; >>>> + core_offset = table_data->len - pptt_start; >>>> + build_processor_hierarchy_node(table_data, >>>> (0 << 0), /* not a physical package */ >>>> - parent_offset, core, NULL, 0); >>>> - } else { >>>> - build_processor_hierarchy_node( >>>> - table_data, >>>> - (1 << 1) | /* ACPI Processor ID valid */ >>>> - (1 << 3), /* Node is a Leaf */ >>>> - parent_offset, uid++, NULL, 0); >>>> + cluster_offset, core_id, NULL, 0); >>>> } >>>> - } >>>> - } >>>> - >>>> - length = g_queue_get_length(list); >>>> - for (i = 0; i < length; i++) { >>>> - int thread; >>>> - parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list)); >>>> - for (thread = 0; thread < ms->smp.threads; thread++) { >>>> - build_processor_hierarchy_node( >>>> - table_data, >>>> + build_processor_hierarchy_node(table_data, >>>> (1 << 1) | /* ACPI Processor ID valid */ >>>> (1 << 2) | /* Processor is a Thread */ >>>> (1 << 3), /* Node is a Leaf */ >>>> - parent_offset, uid++, NULL, 0); >>>> + core_offset, n, NULL, 0); >>>> } >>>> } >>>> - g_queue_free(list); >>>> acpi_table_end(linker, &table); >>>> } Thanks, Gavin
Hi Yanan, On 4/14/22 11:09 AM, wangyanan (Y) wrote: > On 2022/4/3 22:59, Gavin Shan wrote: >> When the PPTT table is built, the CPU topology is re-calculated, but >> it's unecessary because the CPU topology has been populated in >> virt_possible_cpu_arch_ids() on arm/virt machine. >> >> This reworks build_pptt() to avoid by reusing the existing one in >> ms->possible_cpus. Currently, the only user of build_pptt() is >> arm/virt machine. >> >> Signed-off-by: Gavin Shan <gshan@redhat.com> >> --- >> hw/acpi/aml-build.c | 100 +++++++++++++++++--------------------------- >> 1 file changed, 38 insertions(+), 62 deletions(-) >> >> diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c >> index 4086879ebf..4b0f9df3e3 100644 >> --- a/hw/acpi/aml-build.c >> +++ b/hw/acpi/aml-build.c >> @@ -2002,86 +2002,62 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms, >> const char *oem_id, const char *oem_table_id) >> { >> MachineClass *mc = MACHINE_GET_CLASS(ms); >> - GQueue *list = g_queue_new(); >> - guint pptt_start = table_data->len; >> - guint parent_offset; >> - guint length, i; >> - int uid = 0; >> - int socket; >> + CPUArchIdList *cpus = ms->possible_cpus; >> + int64_t socket_id = -1, cluster_id = -1, core_id = -1; > nit: why not using "int" for the ID variables? (to be consistent with how > the IDs are stored in CpuInstanceProperties). > They're actually "int64_t". CPUInstanceProperty is declared in qemu/build/qapi/qapi-types-machine.h like below: struct CpuInstanceProperties { bool has_node_id; int64_t node_id; bool has_socket_id; int64_t socket_id; bool has_die_id; int64_t die_id; bool has_cluster_id; int64_t cluster_id; bool has_core_id; int64_t core_id; bool has_thread_id; int64_t thread_id; }; I doubt if we're using same configurations to build QEMU. I use the following one: configure_qemu() { ./configure --target-list=aarch64-softmmu --enable-numa \ --enable-debug --disable-werror --enable-fdt --enable-kvm \ --disable-mpath --disable-xen --disable-vnc --disable-docs \ --python=/usr/bin/python3 $@ } >> + uint32_t socket_offset, cluster_offset, core_offset; >> + uint32_t pptt_start = table_data->len; >> + int n; >> AcpiTable table = { .sig = "PPTT", .rev = 2, >> .oem_id = oem_id, .oem_table_id = oem_table_id }; >> acpi_table_begin(&table, table_data); >> - for (socket = 0; socket < ms->smp.sockets; socket++) { >> - g_queue_push_tail(list, >> - GUINT_TO_POINTER(table_data->len - pptt_start)); >> - build_processor_hierarchy_node( >> - table_data, >> - /* >> - * Physical package - represents the boundary >> - * of a physical package >> - */ >> - (1 << 0), >> - 0, socket, NULL, 0); >> - } >> + for (n = 0; n < cpus->len; n++) { >> + if (cpus->cpus[n].props.socket_id != socket_id) { >> + socket_id = cpus->cpus[n].props.socket_id; >> + cluster_id = -1; >> + core_id = -1; >> + socket_offset = table_data->len - pptt_start; >> + build_processor_hierarchy_node(table_data, >> + (1 << 0), /* Physical package */ >> + 0, socket_id, NULL, 0); >> + } >> - if (mc->smp_props.clusters_supported) { >> - length = g_queue_get_length(list); >> - for (i = 0; i < length; i++) { >> - int cluster; >> - >> - parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list)); >> - for (cluster = 0; cluster < ms->smp.clusters; cluster++) { >> - g_queue_push_tail(list, >> - GUINT_TO_POINTER(table_data->len - pptt_start)); >> - build_processor_hierarchy_node( >> - table_data, >> - (0 << 0), /* not a physical package */ >> - parent_offset, cluster, NULL, 0); >> + if (mc->smp_props.clusters_supported) { >> + if (cpus->cpus[n].props.cluster_id != cluster_id) { >> + cluster_id = cpus->cpus[n].props.cluster_id; >> + core_id = -1; >> + cluster_offset = table_data->len - pptt_start; >> + build_processor_hierarchy_node(table_data, >> + (0 << 0), /* Not a physical package */ >> + socket_offset, cluster_id, NULL, 0); >> } >> + } else { >> + cluster_offset = socket_offset; >> } >> - } >> - length = g_queue_get_length(list); >> - for (i = 0; i < length; i++) { >> - int core; >> - >> - parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list)); >> - for (core = 0; core < ms->smp.cores; core++) { >> - if (ms->smp.threads > 1) { >> - g_queue_push_tail(list, >> - GUINT_TO_POINTER(table_data->len - pptt_start)); >> - build_processor_hierarchy_node( >> - table_data, >> + if (ms->smp.threads <= 1) { >> + build_processor_hierarchy_node(table_data, >> + (1 << 1) | /* ACPI Processor ID valid */ >> + (1 << 3), /* Node is a Leaf */ >> + cluster_offset, n, NULL, 0); >> + } else { >> + if (cpus->cpus[n].props.core_id != core_id) { >> + core_id = cpus->cpus[n].props.core_id; >> + core_offset = table_data->len - pptt_start; >> + build_processor_hierarchy_node(table_data, >> (0 << 0), /* not a physical package */ >> - parent_offset, core, NULL, 0); >> - } else { >> - build_processor_hierarchy_node( >> - table_data, >> - (1 << 1) | /* ACPI Processor ID valid */ >> - (1 << 3), /* Node is a Leaf */ >> - parent_offset, uid++, NULL, 0); >> + cluster_offset, core_id, NULL, 0); >> } >> - } >> - } >> - >> - length = g_queue_get_length(list); >> - for (i = 0; i < length; i++) { >> - int thread; >> - parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list)); >> - for (thread = 0; thread < ms->smp.threads; thread++) { >> - build_processor_hierarchy_node( >> - table_data, >> + build_processor_hierarchy_node(table_data, >> (1 << 1) | /* ACPI Processor ID valid */ >> (1 << 2) | /* Processor is a Thread */ >> (1 << 3), /* Node is a Leaf */ >> - parent_offset, uid++, NULL, 0); >> + core_offset, n, NULL, 0); >> } >> } >> - g_queue_free(list); >> acpi_table_end(linker, &table); >> } Thanks, Gavin
On 2022/4/14 15:45, Gavin Shan wrote: > Hi Yanan, > > On 4/14/22 11:09 AM, wangyanan (Y) wrote: >> On 2022/4/3 22:59, Gavin Shan wrote: >>> When the PPTT table is built, the CPU topology is re-calculated, but >>> it's unecessary because the CPU topology has been populated in >>> virt_possible_cpu_arch_ids() on arm/virt machine. >>> >>> This reworks build_pptt() to avoid by reusing the existing one in >>> ms->possible_cpus. Currently, the only user of build_pptt() is >>> arm/virt machine. >>> >>> Signed-off-by: Gavin Shan <gshan@redhat.com> >>> --- >>> hw/acpi/aml-build.c | 100 >>> +++++++++++++++++--------------------------- >>> 1 file changed, 38 insertions(+), 62 deletions(-) >>> >>> diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c >>> index 4086879ebf..4b0f9df3e3 100644 >>> --- a/hw/acpi/aml-build.c >>> +++ b/hw/acpi/aml-build.c >>> @@ -2002,86 +2002,62 @@ void build_pptt(GArray *table_data, >>> BIOSLinker *linker, MachineState *ms, >>> const char *oem_id, const char *oem_table_id) >>> { >>> MachineClass *mc = MACHINE_GET_CLASS(ms); >>> - GQueue *list = g_queue_new(); >>> - guint pptt_start = table_data->len; >>> - guint parent_offset; >>> - guint length, i; >>> - int uid = 0; >>> - int socket; >>> + CPUArchIdList *cpus = ms->possible_cpus; >>> + int64_t socket_id = -1, cluster_id = -1, core_id = -1; >> nit: why not using "int" for the ID variables? (to be consistent with >> how >> the IDs are stored in CpuInstanceProperties). >> > > They're actually "int64_t". CPUInstanceProperty is declared in > qemu/build/qapi/qapi-types-machine.h like below: > > struct CpuInstanceProperties { > bool has_node_id; > int64_t node_id; > bool has_socket_id; > int64_t socket_id; > bool has_die_id; > int64_t die_id; > bool has_cluster_id; > int64_t cluster_id; > bool has_core_id; > int64_t core_id; > bool has_thread_id; > int64_t thread_id; > }; > Yes, you are right. > I doubt if we're using same configurations to build QEMU. I use > the following one: > > configure_qemu() { > ./configure --target-list=aarch64-softmmu --enable-numa \ > --enable-debug --disable-werror --enable-fdt --enable-kvm \ > --disable-mpath --disable-xen --disable-vnc --disable-docs \ > --python=/usr/bin/python3 $@ > } > Thanks, Yanan >>> + uint32_t socket_offset, cluster_offset, core_offset; >>> + uint32_t pptt_start = table_data->len; >>> + int n; >>> AcpiTable table = { .sig = "PPTT", .rev = 2, >>> .oem_id = oem_id, .oem_table_id = >>> oem_table_id }; >>> acpi_table_begin(&table, table_data); >>> - for (socket = 0; socket < ms->smp.sockets; socket++) { >>> - g_queue_push_tail(list, >>> - GUINT_TO_POINTER(table_data->len - pptt_start)); >>> - build_processor_hierarchy_node( >>> - table_data, >>> - /* >>> - * Physical package - represents the boundary >>> - * of a physical package >>> - */ >>> - (1 << 0), >>> - 0, socket, NULL, 0); >>> - } >>> + for (n = 0; n < cpus->len; n++) { >>> + if (cpus->cpus[n].props.socket_id != socket_id) { >>> + socket_id = cpus->cpus[n].props.socket_id; >>> + cluster_id = -1; >>> + core_id = -1; >>> + socket_offset = table_data->len - pptt_start; >>> + build_processor_hierarchy_node(table_data, >>> + (1 << 0), /* Physical package */ >>> + 0, socket_id, NULL, 0); >>> + } >>> - if (mc->smp_props.clusters_supported) { >>> - length = g_queue_get_length(list); >>> - for (i = 0; i < length; i++) { >>> - int cluster; >>> - >>> - parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list)); >>> - for (cluster = 0; cluster < ms->smp.clusters; cluster++) { >>> - g_queue_push_tail(list, >>> - GUINT_TO_POINTER(table_data->len - pptt_start)); >>> - build_processor_hierarchy_node( >>> - table_data, >>> - (0 << 0), /* not a physical package */ >>> - parent_offset, cluster, NULL, 0); >>> + if (mc->smp_props.clusters_supported) { >>> + if (cpus->cpus[n].props.cluster_id != cluster_id) { >>> + cluster_id = cpus->cpus[n].props.cluster_id; >>> + core_id = -1; >>> + cluster_offset = table_data->len - pptt_start; >>> + build_processor_hierarchy_node(table_data, >>> + (0 << 0), /* Not a physical package */ >>> + socket_offset, cluster_id, NULL, 0); >>> } >>> + } else { >>> + cluster_offset = socket_offset; >>> } >>> - } >>> - length = g_queue_get_length(list); >>> - for (i = 0; i < length; i++) { >>> - int core; >>> - >>> - parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list)); >>> - for (core = 0; core < ms->smp.cores; core++) { >>> - if (ms->smp.threads > 1) { >>> - g_queue_push_tail(list, >>> - GUINT_TO_POINTER(table_data->len - pptt_start)); >>> - build_processor_hierarchy_node( >>> - table_data, >>> + if (ms->smp.threads <= 1) { >>> + build_processor_hierarchy_node(table_data, >>> + (1 << 1) | /* ACPI Processor ID valid */ >>> + (1 << 3), /* Node is a Leaf */ >>> + cluster_offset, n, NULL, 0); >>> + } else { >>> + if (cpus->cpus[n].props.core_id != core_id) { >>> + core_id = cpus->cpus[n].props.core_id; >>> + core_offset = table_data->len - pptt_start; >>> + build_processor_hierarchy_node(table_data, >>> (0 << 0), /* not a physical package */ >>> - parent_offset, core, NULL, 0); >>> - } else { >>> - build_processor_hierarchy_node( >>> - table_data, >>> - (1 << 1) | /* ACPI Processor ID valid */ >>> - (1 << 3), /* Node is a Leaf */ >>> - parent_offset, uid++, NULL, 0); >>> + cluster_offset, core_id, NULL, 0); >>> } >>> - } >>> - } >>> - >>> - length = g_queue_get_length(list); >>> - for (i = 0; i < length; i++) { >>> - int thread; >>> - parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list)); >>> - for (thread = 0; thread < ms->smp.threads; thread++) { >>> - build_processor_hierarchy_node( >>> - table_data, >>> + build_processor_hierarchy_node(table_data, >>> (1 << 1) | /* ACPI Processor ID valid */ >>> (1 << 2) | /* Processor is a Thread */ >>> (1 << 3), /* Node is a Leaf */ >>> - parent_offset, uid++, NULL, 0); >>> + core_offset, n, NULL, 0); >>> } >>> } >>> - g_queue_free(list); >>> acpi_table_end(linker, &table); >>> } > > Thanks, > Gavin > > > .
On Thu, 14 Apr 2022 08:33:29 +0800 Gavin Shan <gshan@redhat.com> wrote: > Hi Igor, > > On 4/13/22 9:52 PM, Igor Mammedov wrote: > > On Sun, 3 Apr 2022 22:59:53 +0800 > > Gavin Shan <gshan@redhat.com> wrote: > > > >> When the PPTT table is built, the CPU topology is re-calculated, but > >> it's unecessary because the CPU topology has been populated in > >> virt_possible_cpu_arch_ids() on arm/virt machine. > >> > >> This reworks build_pptt() to avoid by reusing the existing one in > >> ms->possible_cpus. Currently, the only user of build_pptt() is > >> arm/virt machine. > >> > >> Signed-off-by: Gavin Shan <gshan@redhat.com> > >> --- > >> hw/acpi/aml-build.c | 100 +++++++++++++++++--------------------------- > >> 1 file changed, 38 insertions(+), 62 deletions(-) > >> > >> diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c > >> index 4086879ebf..4b0f9df3e3 100644 > >> --- a/hw/acpi/aml-build.c > >> +++ b/hw/acpi/aml-build.c > >> @@ -2002,86 +2002,62 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms, > >> const char *oem_id, const char *oem_table_id) > >> { > >> MachineClass *mc = MACHINE_GET_CLASS(ms); > >> - GQueue *list = g_queue_new(); > >> - guint pptt_start = table_data->len; > >> - guint parent_offset; > >> - guint length, i; > >> - int uid = 0; > >> - int socket; > >> + CPUArchIdList *cpus = ms->possible_cpus; > >> + int64_t socket_id = -1, cluster_id = -1, core_id = -1; > >> + uint32_t socket_offset, cluster_offset, core_offset; > >> + uint32_t pptt_start = table_data->len; > >> + int n; > >> AcpiTable table = { .sig = "PPTT", .rev = 2, > >> .oem_id = oem_id, .oem_table_id = oem_table_id }; > >> > >> acpi_table_begin(&table, table_data); > >> > >> - for (socket = 0; socket < ms->smp.sockets; socket++) { > >> - g_queue_push_tail(list, > >> - GUINT_TO_POINTER(table_data->len - pptt_start)); > >> - build_processor_hierarchy_node( > >> - table_data, > >> - /* > >> - * Physical package - represents the boundary > >> - * of a physical package > >> - */ > >> - (1 << 0), > >> - 0, socket, NULL, 0); > >> - } > >> + for (n = 0; n < cpus->len; n++) { > > > >> + if (cpus->cpus[n].props.socket_id != socket_id) { > >> + socket_id = cpus->cpus[n].props.socket_id; > > > > this relies on cpus->cpus[n].props.*_id being sorted form top to down levels > > I'd add here and for other container_id an assert() that checks for that > > specific ID goes in only one direction, to be able to detect when rule is broken. > > > > otherwise on may end up with duplicate containers silently. > > > > Exactly. cpus->cpus[n].props.*_id is sorted as you said in virt_possible_cpu_arch_ids(). > The only user of build_pptt() is arm/virt machine. So it's fine. However, I think I > may need add comments for this in v6. > > /* > * This works with the assumption that cpus[n].props.*_id has been > * sorted from top to down levels in mc->possible_cpu_arch_ids(). > * Otherwise, the unexpected and duplicate containers will be created. > */ > > The implementation in v3 looks complicated, but comprehensive. The one > in this revision (v6) looks simple, but the we're losing flexibility :) comment is not enough, as it will break silently that's why I suggested sprinkling asserts() here. [...] > Thanks, > Gavin >
Hi Igor, On 4/19/22 4:54 PM, Igor Mammedov wrote: > On Thu, 14 Apr 2022 08:33:29 +0800 > Gavin Shan <gshan@redhat.com> wrote: >> On 4/13/22 9:52 PM, Igor Mammedov wrote: >>> On Sun, 3 Apr 2022 22:59:53 +0800 >>> Gavin Shan <gshan@redhat.com> wrote: >>> >>>> When the PPTT table is built, the CPU topology is re-calculated, but >>>> it's unecessary because the CPU topology has been populated in >>>> virt_possible_cpu_arch_ids() on arm/virt machine. >>>> >>>> This reworks build_pptt() to avoid by reusing the existing one in >>>> ms->possible_cpus. Currently, the only user of build_pptt() is >>>> arm/virt machine. >>>> >>>> Signed-off-by: Gavin Shan <gshan@redhat.com> >>>> --- >>>> hw/acpi/aml-build.c | 100 +++++++++++++++++--------------------------- >>>> 1 file changed, 38 insertions(+), 62 deletions(-) >>>> >>>> diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c >>>> index 4086879ebf..4b0f9df3e3 100644 >>>> --- a/hw/acpi/aml-build.c >>>> +++ b/hw/acpi/aml-build.c >>>> @@ -2002,86 +2002,62 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms, >>>> const char *oem_id, const char *oem_table_id) >>>> { >>>> MachineClass *mc = MACHINE_GET_CLASS(ms); >>>> - GQueue *list = g_queue_new(); >>>> - guint pptt_start = table_data->len; >>>> - guint parent_offset; >>>> - guint length, i; >>>> - int uid = 0; >>>> - int socket; >>>> + CPUArchIdList *cpus = ms->possible_cpus; >>>> + int64_t socket_id = -1, cluster_id = -1, core_id = -1; >>>> + uint32_t socket_offset, cluster_offset, core_offset; >>>> + uint32_t pptt_start = table_data->len; >>>> + int n; >>>> AcpiTable table = { .sig = "PPTT", .rev = 2, >>>> .oem_id = oem_id, .oem_table_id = oem_table_id }; >>>> >>>> acpi_table_begin(&table, table_data); >>>> >>>> - for (socket = 0; socket < ms->smp.sockets; socket++) { >>>> - g_queue_push_tail(list, >>>> - GUINT_TO_POINTER(table_data->len - pptt_start)); >>>> - build_processor_hierarchy_node( >>>> - table_data, >>>> - /* >>>> - * Physical package - represents the boundary >>>> - * of a physical package >>>> - */ >>>> - (1 << 0), >>>> - 0, socket, NULL, 0); >>>> - } >>>> + for (n = 0; n < cpus->len; n++) { >>> >>>> + if (cpus->cpus[n].props.socket_id != socket_id) { >>>> + socket_id = cpus->cpus[n].props.socket_id; >>> >>> this relies on cpus->cpus[n].props.*_id being sorted form top to down levels >>> I'd add here and for other container_id an assert() that checks for that >>> specific ID goes in only one direction, to be able to detect when rule is broken. >>> >>> otherwise on may end up with duplicate containers silently. >>> >> >> Exactly. cpus->cpus[n].props.*_id is sorted as you said in virt_possible_cpu_arch_ids(). >> The only user of build_pptt() is arm/virt machine. So it's fine. However, I think I >> may need add comments for this in v6. >> >> /* >> * This works with the assumption that cpus[n].props.*_id has been >> * sorted from top to down levels in mc->possible_cpu_arch_ids(). >> * Otherwise, the unexpected and duplicate containers will be created. >> */ >> >> The implementation in v3 looks complicated, but comprehensive. The one >> in this revision (v6) looks simple, but the we're losing flexibility :) > > > comment is not enough, as it will break silently that's why I suggested > sprinkling asserts() here. > I don't think it breaks anything. Duplicated PPTT entries are allowed in linux at least. The IDs in the duplicated PPTT entries should be same. Otherwise, the exposed CPU topology is really broken. I don't think it's harmful to add the check and assert, so I will introduce a helper function like below in v7. Sadly that v6 was posted before I received your confirm. Igor, could you please the changes, to be included into v7, looks good to you? The complete patch is also attached :) +static bool pptt_entry_exists(MachineState *ms, int n, bool check_socket_id, + bool check_cluster_id, bool check_core_id) +{ + CPUArchId *cpus = ms->possible_cpus->cpus; + CpuInstanceProperties *t = &cpus[n].props; + CpuInstanceProperties *s; + bool match; + int i; + + for (i = 0; i < n; i++) { + match = true; + s = &cpus[i].props; + + if (check_socket_id && s->socket_id != t->socket_id) { + match = false; + } + + if (match && check_cluster_id && s->cluster_id != t->cluster_id) { + match = false; + } + + if (match && check_core_id && s->core_id != t->core_id) { + match = false; + } + + if (match) { + return true; + } + } + + return false; +} The following assert() will be applied in build_pptt(): assert(!pptt_entry_exists(ms, n, true, false, false)); /* socket */ assert(!pptt_entry_exists(ms, n, true, true, false)); /* cluster */ assert(!pptt_entry_exists(ms, n, true, mc->smp_props.clusters_supported, true)); /* core */ Thanks, Gavin
On Wed, 20 Apr 2022 13:19:34 +0800 Gavin Shan <gshan@redhat.com> wrote: > Hi Igor, > > On 4/19/22 4:54 PM, Igor Mammedov wrote: > > On Thu, 14 Apr 2022 08:33:29 +0800 > > Gavin Shan <gshan@redhat.com> wrote: > >> On 4/13/22 9:52 PM, Igor Mammedov wrote: > >>> On Sun, 3 Apr 2022 22:59:53 +0800 > >>> Gavin Shan <gshan@redhat.com> wrote: > >>> > >>>> When the PPTT table is built, the CPU topology is re-calculated, but > >>>> it's unecessary because the CPU topology has been populated in > >>>> virt_possible_cpu_arch_ids() on arm/virt machine. > >>>> > >>>> This reworks build_pptt() to avoid by reusing the existing one in > >>>> ms->possible_cpus. Currently, the only user of build_pptt() is > >>>> arm/virt machine. > >>>> > >>>> Signed-off-by: Gavin Shan <gshan@redhat.com> > >>>> --- > >>>> hw/acpi/aml-build.c | 100 +++++++++++++++++--------------------------- > >>>> 1 file changed, 38 insertions(+), 62 deletions(-) > >>>> > >>>> diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c > >>>> index 4086879ebf..4b0f9df3e3 100644 > >>>> --- a/hw/acpi/aml-build.c > >>>> +++ b/hw/acpi/aml-build.c > >>>> @@ -2002,86 +2002,62 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms, > >>>> const char *oem_id, const char *oem_table_id) > >>>> { > >>>> MachineClass *mc = MACHINE_GET_CLASS(ms); > >>>> - GQueue *list = g_queue_new(); > >>>> - guint pptt_start = table_data->len; > >>>> - guint parent_offset; > >>>> - guint length, i; > >>>> - int uid = 0; > >>>> - int socket; > >>>> + CPUArchIdList *cpus = ms->possible_cpus; > >>>> + int64_t socket_id = -1, cluster_id = -1, core_id = -1; > >>>> + uint32_t socket_offset, cluster_offset, core_offset; > >>>> + uint32_t pptt_start = table_data->len; > >>>> + int n; > >>>> AcpiTable table = { .sig = "PPTT", .rev = 2, > >>>> .oem_id = oem_id, .oem_table_id = oem_table_id }; > >>>> > >>>> acpi_table_begin(&table, table_data); > >>>> > >>>> - for (socket = 0; socket < ms->smp.sockets; socket++) { > >>>> - g_queue_push_tail(list, > >>>> - GUINT_TO_POINTER(table_data->len - pptt_start)); > >>>> - build_processor_hierarchy_node( > >>>> - table_data, > >>>> - /* > >>>> - * Physical package - represents the boundary > >>>> - * of a physical package > >>>> - */ > >>>> - (1 << 0), > >>>> - 0, socket, NULL, 0); > >>>> - } > >>>> + for (n = 0; n < cpus->len; n++) { > >>> > >>>> + if (cpus->cpus[n].props.socket_id != socket_id) { > >>>> + socket_id = cpus->cpus[n].props.socket_id; > >>> > >>> this relies on cpus->cpus[n].props.*_id being sorted form top to down levels > >>> I'd add here and for other container_id an assert() that checks for that > >>> specific ID goes in only one direction, to be able to detect when rule is broken. > >>> > >>> otherwise on may end up with duplicate containers silently. > >>> > >> > >> Exactly. cpus->cpus[n].props.*_id is sorted as you said in virt_possible_cpu_arch_ids(). > >> The only user of build_pptt() is arm/virt machine. So it's fine. However, I think I > >> may need add comments for this in v6. > >> > >> /* > >> * This works with the assumption that cpus[n].props.*_id has been > >> * sorted from top to down levels in mc->possible_cpu_arch_ids(). > >> * Otherwise, the unexpected and duplicate containers will be created. > >> */ > >> > >> The implementation in v3 looks complicated, but comprehensive. The one > >> in this revision (v6) looks simple, but the we're losing flexibility :) > > > > > > comment is not enough, as it will break silently that's why I suggested > > sprinkling asserts() here. > > > > I don't think it breaks anything. Duplicated PPTT entries are allowed in > linux at least. The IDs in the duplicated PPTT entries should be same. > Otherwise, the exposed CPU topology is really broken. Spec doesn't say anything about allowing duplicate entries so I'd rather avoid that (if you find a such provision in spec then put a reference in this commit message to end discussion on duplicates). > > I don't think it's harmful to add the check and assert, so I will introduce > a helper function like below in v7. Sadly that v6 was posted before I received > your confirm. Igor, could you please the changes, to be included into v7, > looks good to you? The complete patch is also attached :) > > +static bool pptt_entry_exists(MachineState *ms, int n, bool check_socket_id, > + bool check_cluster_id, bool check_core_id) > +{ > + CPUArchId *cpus = ms->possible_cpus->cpus; > + CpuInstanceProperties *t = &cpus[n].props; > + CpuInstanceProperties *s; > + bool match; > + int i; > + > + for (i = 0; i < n; i++) { Wouldn't it make whole thing O(n^2) in worst case? I suggest put asserts directly into build_pptt() and considering that it relies on ids being sorted, do something like this: assert(foo_id_val > previous_id) which will ensure that id doesn't jump back unexpectedly > + match = true; > + s = &cpus[i].props; > + > + if (check_socket_id && s->socket_id != t->socket_id) { > + match = false; > + } > + > + if (match && check_cluster_id && s->cluster_id != t->cluster_id) { > + match = false; > + } > + > + if (match && check_core_id && s->core_id != t->core_id) { > + match = false; > + } > + > + if (match) { > + return true; > + } > + } > + > + return false; > +} > > The following assert() will be applied in build_pptt(): > > assert(!pptt_entry_exists(ms, n, true, false, false)); /* socket */ > assert(!pptt_entry_exists(ms, n, true, true, false)); /* cluster */ > assert(!pptt_entry_exists(ms, n, true, > mc->smp_props.clusters_supported, true)); /* core */ > > Thanks, > Gavin >
Hi Igor, On 4/20/22 4:10 PM, Igor Mammedov wrote: > On Wed, 20 Apr 2022 13:19:34 +0800 > Gavin Shan <gshan@redhat.com> wrote: >> On 4/19/22 4:54 PM, Igor Mammedov wrote: >>> On Thu, 14 Apr 2022 08:33:29 +0800 >>> Gavin Shan <gshan@redhat.com> wrote: >>>> On 4/13/22 9:52 PM, Igor Mammedov wrote: >>>>> On Sun, 3 Apr 2022 22:59:53 +0800 >>>>> Gavin Shan <gshan@redhat.com> wrote: >>>>> >>>>>> When the PPTT table is built, the CPU topology is re-calculated, but >>>>>> it's unecessary because the CPU topology has been populated in >>>>>> virt_possible_cpu_arch_ids() on arm/virt machine. >>>>>> >>>>>> This reworks build_pptt() to avoid by reusing the existing one in >>>>>> ms->possible_cpus. Currently, the only user of build_pptt() is >>>>>> arm/virt machine. >>>>>> >>>>>> Signed-off-by: Gavin Shan <gshan@redhat.com> >>>>>> --- >>>>>> hw/acpi/aml-build.c | 100 +++++++++++++++++--------------------------- >>>>>> 1 file changed, 38 insertions(+), 62 deletions(-) >>>>>> >>>>>> diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c >>>>>> index 4086879ebf..4b0f9df3e3 100644 >>>>>> --- a/hw/acpi/aml-build.c >>>>>> +++ b/hw/acpi/aml-build.c >>>>>> @@ -2002,86 +2002,62 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms, >>>>>> const char *oem_id, const char *oem_table_id) >>>>>> { >>>>>> MachineClass *mc = MACHINE_GET_CLASS(ms); >>>>>> - GQueue *list = g_queue_new(); >>>>>> - guint pptt_start = table_data->len; >>>>>> - guint parent_offset; >>>>>> - guint length, i; >>>>>> - int uid = 0; >>>>>> - int socket; >>>>>> + CPUArchIdList *cpus = ms->possible_cpus; >>>>>> + int64_t socket_id = -1, cluster_id = -1, core_id = -1; >>>>>> + uint32_t socket_offset, cluster_offset, core_offset; >>>>>> + uint32_t pptt_start = table_data->len; >>>>>> + int n; >>>>>> AcpiTable table = { .sig = "PPTT", .rev = 2, >>>>>> .oem_id = oem_id, .oem_table_id = oem_table_id }; >>>>>> >>>>>> acpi_table_begin(&table, table_data); >>>>>> >>>>>> - for (socket = 0; socket < ms->smp.sockets; socket++) { >>>>>> - g_queue_push_tail(list, >>>>>> - GUINT_TO_POINTER(table_data->len - pptt_start)); >>>>>> - build_processor_hierarchy_node( >>>>>> - table_data, >>>>>> - /* >>>>>> - * Physical package - represents the boundary >>>>>> - * of a physical package >>>>>> - */ >>>>>> - (1 << 0), >>>>>> - 0, socket, NULL, 0); >>>>>> - } >>>>>> + for (n = 0; n < cpus->len; n++) { >>>>> >>>>>> + if (cpus->cpus[n].props.socket_id != socket_id) { >>>>>> + socket_id = cpus->cpus[n].props.socket_id; >>>>> >>>>> this relies on cpus->cpus[n].props.*_id being sorted form top to down levels >>>>> I'd add here and for other container_id an assert() that checks for that >>>>> specific ID goes in only one direction, to be able to detect when rule is broken. >>>>> >>>>> otherwise on may end up with duplicate containers silently. >>>>> >>>> >>>> Exactly. cpus->cpus[n].props.*_id is sorted as you said in virt_possible_cpu_arch_ids(). >>>> The only user of build_pptt() is arm/virt machine. So it's fine. However, I think I >>>> may need add comments for this in v6. >>>> >>>> /* >>>> * This works with the assumption that cpus[n].props.*_id has been >>>> * sorted from top to down levels in mc->possible_cpu_arch_ids(). >>>> * Otherwise, the unexpected and duplicate containers will be created. >>>> */ >>>> >>>> The implementation in v3 looks complicated, but comprehensive. The one >>>> in this revision (v6) looks simple, but the we're losing flexibility :) >>> >>> >>> comment is not enough, as it will break silently that's why I suggested >>> sprinkling asserts() here. >>> >> >> I don't think it breaks anything. Duplicated PPTT entries are allowed in >> linux at least. The IDs in the duplicated PPTT entries should be same. >> Otherwise, the exposed CPU topology is really broken. > > Spec doesn't say anything about allowing duplicate entries so I'd rather > avoid that (if you find a such provision in spec then put a reference > in this commit message to end discussion on duplicates). > Yes, Spec doesn't say it's allowed. I checked linux implementation on how PPTT table is parsed. Duplicate entries won't break anything actually in Linux. I'm not sure about other OS. So I think it's still needed. > >> >> I don't think it's harmful to add the check and assert, so I will introduce >> a helper function like below in v7. Sadly that v6 was posted before I received >> your confirm. Igor, could you please the changes, to be included into v7, >> looks good to you? The complete patch is also attached :) >> >> +static bool pptt_entry_exists(MachineState *ms, int n, bool check_socket_id, >> + bool check_cluster_id, bool check_core_id) >> +{ >> + CPUArchId *cpus = ms->possible_cpus->cpus; >> + CpuInstanceProperties *t = &cpus[n].props; >> + CpuInstanceProperties *s; >> + bool match; >> + int i; >> + >> + for (i = 0; i < n; i++) { > > Wouldn't it make whole thing O(n^2) in worst case? > > I suggest put asserts directly into build_pptt() and considering that > it relies on ids being sorted, do something like this: > assert(foo_id_val > previous_id) > which will ensure that id doesn't jump back unexpectedly > Yes, your suggested method is much simpler and more efficient. I will include it in v7. By the way, please skip v6 and go ahead to review v7 directly. v7 should be posted shortly after some tests :) > >> + match = true; >> + s = &cpus[i].props; >> + >> + if (check_socket_id && s->socket_id != t->socket_id) { >> + match = false; >> + } >> + >> + if (match && check_cluster_id && s->cluster_id != t->cluster_id) { >> + match = false; >> + } >> + >> + if (match && check_core_id && s->core_id != t->core_id) { >> + match = false; >> + } >> + >> + if (match) { >> + return true; >> + } >> + } >> + >> + return false; >> +} >> >> The following assert() will be applied in build_pptt(): >> >> assert(!pptt_entry_exists(ms, n, true, false, false)); /* socket */ >> assert(!pptt_entry_exists(ms, n, true, true, false)); /* cluster */ >> assert(!pptt_entry_exists(ms, n, true, >> mc->smp_props.clusters_supported, true)); /* core */ >> Thanks, Gavin
diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c index 4086879ebf..4b0f9df3e3 100644 --- a/hw/acpi/aml-build.c +++ b/hw/acpi/aml-build.c @@ -2002,86 +2002,62 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms, const char *oem_id, const char *oem_table_id) { MachineClass *mc = MACHINE_GET_CLASS(ms); - GQueue *list = g_queue_new(); - guint pptt_start = table_data->len; - guint parent_offset; - guint length, i; - int uid = 0; - int socket; + CPUArchIdList *cpus = ms->possible_cpus; + int64_t socket_id = -1, cluster_id = -1, core_id = -1; + uint32_t socket_offset, cluster_offset, core_offset; + uint32_t pptt_start = table_data->len; + int n; AcpiTable table = { .sig = "PPTT", .rev = 2, .oem_id = oem_id, .oem_table_id = oem_table_id }; acpi_table_begin(&table, table_data); - for (socket = 0; socket < ms->smp.sockets; socket++) { - g_queue_push_tail(list, - GUINT_TO_POINTER(table_data->len - pptt_start)); - build_processor_hierarchy_node( - table_data, - /* - * Physical package - represents the boundary - * of a physical package - */ - (1 << 0), - 0, socket, NULL, 0); - } + for (n = 0; n < cpus->len; n++) { + if (cpus->cpus[n].props.socket_id != socket_id) { + socket_id = cpus->cpus[n].props.socket_id; + cluster_id = -1; + core_id = -1; + socket_offset = table_data->len - pptt_start; + build_processor_hierarchy_node(table_data, + (1 << 0), /* Physical package */ + 0, socket_id, NULL, 0); + } - if (mc->smp_props.clusters_supported) { - length = g_queue_get_length(list); - for (i = 0; i < length; i++) { - int cluster; - - parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list)); - for (cluster = 0; cluster < ms->smp.clusters; cluster++) { - g_queue_push_tail(list, - GUINT_TO_POINTER(table_data->len - pptt_start)); - build_processor_hierarchy_node( - table_data, - (0 << 0), /* not a physical package */ - parent_offset, cluster, NULL, 0); + if (mc->smp_props.clusters_supported) { + if (cpus->cpus[n].props.cluster_id != cluster_id) { + cluster_id = cpus->cpus[n].props.cluster_id; + core_id = -1; + cluster_offset = table_data->len - pptt_start; + build_processor_hierarchy_node(table_data, + (0 << 0), /* Not a physical package */ + socket_offset, cluster_id, NULL, 0); } + } else { + cluster_offset = socket_offset; } - } - length = g_queue_get_length(list); - for (i = 0; i < length; i++) { - int core; - - parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list)); - for (core = 0; core < ms->smp.cores; core++) { - if (ms->smp.threads > 1) { - g_queue_push_tail(list, - GUINT_TO_POINTER(table_data->len - pptt_start)); - build_processor_hierarchy_node( - table_data, + if (ms->smp.threads <= 1) { + build_processor_hierarchy_node(table_data, + (1 << 1) | /* ACPI Processor ID valid */ + (1 << 3), /* Node is a Leaf */ + cluster_offset, n, NULL, 0); + } else { + if (cpus->cpus[n].props.core_id != core_id) { + core_id = cpus->cpus[n].props.core_id; + core_offset = table_data->len - pptt_start; + build_processor_hierarchy_node(table_data, (0 << 0), /* not a physical package */ - parent_offset, core, NULL, 0); - } else { - build_processor_hierarchy_node( - table_data, - (1 << 1) | /* ACPI Processor ID valid */ - (1 << 3), /* Node is a Leaf */ - parent_offset, uid++, NULL, 0); + cluster_offset, core_id, NULL, 0); } - } - } - - length = g_queue_get_length(list); - for (i = 0; i < length; i++) { - int thread; - parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list)); - for (thread = 0; thread < ms->smp.threads; thread++) { - build_processor_hierarchy_node( - table_data, + build_processor_hierarchy_node(table_data, (1 << 1) | /* ACPI Processor ID valid */ (1 << 2) | /* Processor is a Thread */ (1 << 3), /* Node is a Leaf */ - parent_offset, uid++, NULL, 0); + core_offset, n, NULL, 0); } } - g_queue_free(list); acpi_table_end(linker, &table); }
When the PPTT table is built, the CPU topology is re-calculated, but it's unecessary because the CPU topology has been populated in virt_possible_cpu_arch_ids() on arm/virt machine. This reworks build_pptt() to avoid by reusing the existing one in ms->possible_cpus. Currently, the only user of build_pptt() is arm/virt machine. Signed-off-by: Gavin Shan <gshan@redhat.com> --- hw/acpi/aml-build.c | 100 +++++++++++++++++--------------------------- 1 file changed, 38 insertions(+), 62 deletions(-)