Message ID | 1502846811-32360-1-git-send-email-douly.fnst@cn.fujitsu.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Wed, 16 Aug 2017 09:26:51 +0800 Dou Liyang <douly.fnst@cn.fujitsu.com> wrote: > Currently, Using the fisrt node without memory on the machine makes > QEMU unhappy. With this example command line: > ... \ > -m 1024M,slots=4,maxmem=32G \ > -numa node,nodeid=0 \ > -numa node,mem=1024M,nodeid=1 \ > -numa node,nodeid=2 \ > -numa node,nodeid=3 \ > Guest reports "No NUMA configuration found" and the NUMA topology is > wrong. > > This is because when QEMU builds ACPI SRAT, it regards node0 as the > default node to deal with the memory hole(640K-1M). this means the > node0 must have some memory(>1M), but, actually it can have no > memory. > > Fix this problem by replace the node0 with the first node which has > memory on it. Add a new function for each node. Also do some cleanup. It seems harmless but one never knows for sure, could you test it with different guests including old windows (up to XP)/ linux (2.6 stable kernel) versions? > > Signed-off-by: Dou Liyang <douly.fnst@cn.fujitsu.com> > --- > V2 --> V1: > -Fix a coding style problem > Replace > for (node = 0; > node < pcms->numa_nodes && pcms->node_mem[node] == 0; > node++); > > with > for (node = 0; node < pcms->numa_nodes; node++) { > if (pcms->node_mem[node] != 0) { > break; > } > > hw/i386/acpi-build.c | 78 +++++++++++++++++++++++++++++++++------------------- > 1 file changed, 50 insertions(+), 28 deletions(-) > > diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c > index 98dd424..f93d712 100644 > --- a/hw/i386/acpi-build.c > +++ b/hw/i386/acpi-build.c > @@ -2318,15 +2318,43 @@ build_tpm2(GArray *table_data, BIOSLinker *linker) > (void *)tpm2_ptr, "TPM2", sizeof(*tpm2_ptr), 4, NULL, NULL); > } > > +static uint64_t > +build_srat_node_entry(GArray *table_data, PCMachineState *pcms, > + int i, uint64_t mem_base, uint64_t mem_len) > +{ > + AcpiSratMemoryAffinity *numamem; > + uint64_t next_base; > + > + next_base = mem_base + mem_len; > + > + /* Cut out the ACPI_PCI hole */ > + if (mem_base <= pcms->below_4g_mem_size && > + next_base > pcms->below_4g_mem_size) { > + mem_len -= next_base - pcms->below_4g_mem_size; > + if (mem_len > 0) { > + numamem = acpi_data_push(table_data, sizeof *numamem); > + build_srat_memory(numamem, mem_base, mem_len, i, > + MEM_AFFINITY_ENABLED); > + } > + mem_base = 1ULL << 32; > + mem_len = next_base - pcms->below_4g_mem_size; > + next_base += (1ULL << 32) - pcms->below_4g_mem_size; > + } > + numamem = acpi_data_push(table_data, sizeof *numamem); > + build_srat_memory(numamem, mem_base, mem_len, i, > + MEM_AFFINITY_ENABLED); > + return next_base; > +} > + > static void > build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) > { > AcpiSystemResourceAffinityTable *srat; > AcpiSratMemoryAffinity *numamem; > > - int i; > + int i, node; > int srat_start, numa_start, slots; > - uint64_t mem_len, mem_base, next_base; > + uint64_t mem_len, mem_base; > MachineClass *mc = MACHINE_GET_CLASS(machine); > const CPUArchIdList *apic_ids = mc->possible_cpu_arch_ids(machine); > PCMachineState *pcms = PC_MACHINE(machine); > @@ -2370,36 +2398,30 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) > /* the memory map is a bit tricky, it contains at least one hole > * from 640k-1M and possibly another one from 3.5G-4G. > */ > - next_base = 0; > + > numa_start = table_data->len; > > - numamem = acpi_data_push(table_data, sizeof *numamem); > - build_srat_memory(numamem, 0, 640 * 1024, 0, MEM_AFFINITY_ENABLED); > - next_base = 1024 * 1024; > - for (i = 1; i < pcms->numa_nodes + 1; ++i) { > - mem_base = next_base; > - mem_len = pcms->node_mem[i - 1]; > - if (i == 1) { > - mem_len -= 1024 * 1024; > + /* get the first node which has memory and map the hole from 640K-1M */ > + for (node = 0; node < pcms->numa_nodes; node++) { > + if (pcms->node_mem[node] != 0) { > + break; > } > - next_base = mem_base + mem_len; > - > - /* Cut out the ACPI_PCI hole */ > - if (mem_base <= pcms->below_4g_mem_size && > - next_base > pcms->below_4g_mem_size) { > - mem_len -= next_base - pcms->below_4g_mem_size; > - if (mem_len > 0) { > - numamem = acpi_data_push(table_data, sizeof *numamem); > - build_srat_memory(numamem, mem_base, mem_len, i - 1, > - MEM_AFFINITY_ENABLED); > - } > - mem_base = 1ULL << 32; > - mem_len = next_base - pcms->below_4g_mem_size; > - next_base += (1ULL << 32) - pcms->below_4g_mem_size; > + } > + numamem = acpi_data_push(table_data, sizeof *numamem); > + build_srat_memory(numamem, 0, 640 * 1024, node, MEM_AFFINITY_ENABLED); > + > + /* map the rest of memory from 1M */ > + mem_base = 1024 * 1024; > + mem_len = pcms->node_mem[node] - mem_base; > + mem_base = build_srat_node_entry(table_data, pcms, node, > + mem_base, mem_len); > + > + for (i = 0; i < pcms->numa_nodes; i++) { > + if (i == node) { > + continue; > } > - numamem = acpi_data_push(table_data, sizeof *numamem); > - build_srat_memory(numamem, mem_base, mem_len, i - 1, > - MEM_AFFINITY_ENABLED); > + mem_base = build_srat_node_entry(table_data, pcms, i, > + mem_base, pcms->node_mem[i]); > } > slots = (table_data->len - numa_start) / sizeof *numamem; > for (; slots < pcms->numa_nodes + 2; slots++) {
Hi Igor, At 08/16/2017 05:18 PM, Igor Mammedov wrote: > On Wed, 16 Aug 2017 09:26:51 +0800 > Dou Liyang <douly.fnst@cn.fujitsu.com> wrote: > >> Currently, Using the fisrt node without memory on the machine makes >> QEMU unhappy. With this example command line: >> ... \ >> -m 1024M,slots=4,maxmem=32G \ >> -numa node,nodeid=0 \ >> -numa node,mem=1024M,nodeid=1 \ >> -numa node,nodeid=2 \ >> -numa node,nodeid=3 \ >> Guest reports "No NUMA configuration found" and the NUMA topology is >> wrong. >> >> This is because when QEMU builds ACPI SRAT, it regards node0 as the >> default node to deal with the memory hole(640K-1M). this means the >> node0 must have some memory(>1M), but, actually it can have no >> memory. >> >> Fix this problem by replace the node0 with the first node which has >> memory on it. Add a new function for each node. Also do some cleanup. > It seems harmless but one never knows for sure, > could you test it with different guests including old windows (up to XP)/ > linux (2.6 stable kernel) versions? > I have test it with the following guests: windows 7. linux 4.13-rc5. will test it in linux (2.6 and 3.10 stable kernel). Thanks, dou. >> >> Signed-off-by: Dou Liyang <douly.fnst@cn.fujitsu.com> >> --- >> V2 --> V1: >> -Fix a coding style problem >> Replace >> for (node = 0; >> node < pcms->numa_nodes && pcms->node_mem[node] == 0; >> node++); >> >> with >> for (node = 0; node < pcms->numa_nodes; node++) { >> if (pcms->node_mem[node] != 0) { >> break; >> } >> >> hw/i386/acpi-build.c | 78 +++++++++++++++++++++++++++++++++------------------- >> 1 file changed, 50 insertions(+), 28 deletions(-) >> >> diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c >> index 98dd424..f93d712 100644 >> --- a/hw/i386/acpi-build.c >> +++ b/hw/i386/acpi-build.c >> @@ -2318,15 +2318,43 @@ build_tpm2(GArray *table_data, BIOSLinker *linker) >> (void *)tpm2_ptr, "TPM2", sizeof(*tpm2_ptr), 4, NULL, NULL); >> } >> >> +static uint64_t >> +build_srat_node_entry(GArray *table_data, PCMachineState *pcms, >> + int i, uint64_t mem_base, uint64_t mem_len) >> +{ >> + AcpiSratMemoryAffinity *numamem; >> + uint64_t next_base; >> + >> + next_base = mem_base + mem_len; >> + >> + /* Cut out the ACPI_PCI hole */ >> + if (mem_base <= pcms->below_4g_mem_size && >> + next_base > pcms->below_4g_mem_size) { >> + mem_len -= next_base - pcms->below_4g_mem_size; >> + if (mem_len > 0) { >> + numamem = acpi_data_push(table_data, sizeof *numamem); >> + build_srat_memory(numamem, mem_base, mem_len, i, >> + MEM_AFFINITY_ENABLED); >> + } >> + mem_base = 1ULL << 32; >> + mem_len = next_base - pcms->below_4g_mem_size; >> + next_base += (1ULL << 32) - pcms->below_4g_mem_size; >> + } >> + numamem = acpi_data_push(table_data, sizeof *numamem); >> + build_srat_memory(numamem, mem_base, mem_len, i, >> + MEM_AFFINITY_ENABLED); >> + return next_base; >> +} >> + >> static void >> build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) >> { >> AcpiSystemResourceAffinityTable *srat; >> AcpiSratMemoryAffinity *numamem; >> >> - int i; >> + int i, node; >> int srat_start, numa_start, slots; >> - uint64_t mem_len, mem_base, next_base; >> + uint64_t mem_len, mem_base; >> MachineClass *mc = MACHINE_GET_CLASS(machine); >> const CPUArchIdList *apic_ids = mc->possible_cpu_arch_ids(machine); >> PCMachineState *pcms = PC_MACHINE(machine); >> @@ -2370,36 +2398,30 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) >> /* the memory map is a bit tricky, it contains at least one hole >> * from 640k-1M and possibly another one from 3.5G-4G. >> */ >> - next_base = 0; >> + >> numa_start = table_data->len; >> >> - numamem = acpi_data_push(table_data, sizeof *numamem); >> - build_srat_memory(numamem, 0, 640 * 1024, 0, MEM_AFFINITY_ENABLED); >> - next_base = 1024 * 1024; >> - for (i = 1; i < pcms->numa_nodes + 1; ++i) { >> - mem_base = next_base; >> - mem_len = pcms->node_mem[i - 1]; >> - if (i == 1) { >> - mem_len -= 1024 * 1024; >> + /* get the first node which has memory and map the hole from 640K-1M */ >> + for (node = 0; node < pcms->numa_nodes; node++) { >> + if (pcms->node_mem[node] != 0) { >> + break; >> } >> - next_base = mem_base + mem_len; >> - >> - /* Cut out the ACPI_PCI hole */ >> - if (mem_base <= pcms->below_4g_mem_size && >> - next_base > pcms->below_4g_mem_size) { >> - mem_len -= next_base - pcms->below_4g_mem_size; >> - if (mem_len > 0) { >> - numamem = acpi_data_push(table_data, sizeof *numamem); >> - build_srat_memory(numamem, mem_base, mem_len, i - 1, >> - MEM_AFFINITY_ENABLED); >> - } >> - mem_base = 1ULL << 32; >> - mem_len = next_base - pcms->below_4g_mem_size; >> - next_base += (1ULL << 32) - pcms->below_4g_mem_size; >> + } >> + numamem = acpi_data_push(table_data, sizeof *numamem); >> + build_srat_memory(numamem, 0, 640 * 1024, node, MEM_AFFINITY_ENABLED); >> + >> + /* map the rest of memory from 1M */ >> + mem_base = 1024 * 1024; >> + mem_len = pcms->node_mem[node] - mem_base; >> + mem_base = build_srat_node_entry(table_data, pcms, node, >> + mem_base, mem_len); >> + >> + for (i = 0; i < pcms->numa_nodes; i++) { >> + if (i == node) { >> + continue; >> } >> - numamem = acpi_data_push(table_data, sizeof *numamem); >> - build_srat_memory(numamem, mem_base, mem_len, i - 1, >> - MEM_AFFINITY_ENABLED); >> + mem_base = build_srat_node_entry(table_data, pcms, i, >> + mem_base, pcms->node_mem[i]); >> } >> slots = (table_data->len - numa_start) / sizeof *numamem; >> for (; slots < pcms->numa_nodes + 2; slots++) { > > > >
On Wed, Aug 16, 2017 at 11:18:07AM +0200, Igor Mammedov wrote: > On Wed, 16 Aug 2017 09:26:51 +0800 > Dou Liyang <douly.fnst@cn.fujitsu.com> wrote: > > > Currently, Using the fisrt node without memory on the machine makes > > QEMU unhappy. With this example command line: > > ... \ > > -m 1024M,slots=4,maxmem=32G \ > > -numa node,nodeid=0 \ > > -numa node,mem=1024M,nodeid=1 \ > > -numa node,nodeid=2 \ > > -numa node,nodeid=3 \ > > Guest reports "No NUMA configuration found" and the NUMA topology is > > wrong. > > > > This is because when QEMU builds ACPI SRAT, it regards node0 as the > > default node to deal with the memory hole(640K-1M). this means the > > node0 must have some memory(>1M), but, actually it can have no > > memory. > > > > Fix this problem by replace the node0 with the first node which has > > memory on it. Add a new function for each node. Also do some cleanup. > It seems harmless but one never knows for sure, > could you test it with different guests including old windows (up to XP)/ > linux (2.6 stable kernel) versions? This patch is supposed to affect only the cases where there's no RAM configured on node 0. I won't be surprised if some guest OSes don't like it, but in this case the solution is to not configure the VM that way. That means I don't think we really need to test ancient OSes if we ensure there are no ACPI table changes on the existing known-to-work configurations.
On Wed, Aug 16, 2017 at 09:26:51AM +0800, Dou Liyang wrote: > Currently, Using the fisrt node without memory on the machine makes > QEMU unhappy. With this example command line: > ... \ > -m 1024M,slots=4,maxmem=32G \ > -numa node,nodeid=0 \ > -numa node,mem=1024M,nodeid=1 \ > -numa node,nodeid=2 \ > -numa node,nodeid=3 \ > Guest reports "No NUMA configuration found" and the NUMA topology is > wrong. > > This is because when QEMU builds ACPI SRAT, it regards node0 as the > default node to deal with the memory hole(640K-1M). this means the > node0 must have some memory(>1M), but, actually it can have no > memory. > > Fix this problem by replace the node0 with the first node which has > memory on it. Add a new function for each node. Also do some cleanup. > > Signed-off-by: Dou Liyang <douly.fnst@cn.fujitsu.com> This isn't a regression, is it? If so I think it's safe to postpone this to 2.11. > --- > V2 --> V1: > -Fix a coding style problem > Replace > for (node = 0; > node < pcms->numa_nodes && pcms->node_mem[node] == 0; > node++); > > with > for (node = 0; node < pcms->numa_nodes; node++) { > if (pcms->node_mem[node] != 0) { > break; > } > > hw/i386/acpi-build.c | 78 +++++++++++++++++++++++++++++++++------------------- > 1 file changed, 50 insertions(+), 28 deletions(-) > > diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c > index 98dd424..f93d712 100644 > --- a/hw/i386/acpi-build.c > +++ b/hw/i386/acpi-build.c > @@ -2318,15 +2318,43 @@ build_tpm2(GArray *table_data, BIOSLinker *linker) > (void *)tpm2_ptr, "TPM2", sizeof(*tpm2_ptr), 4, NULL, NULL); > } > > +static uint64_t > +build_srat_node_entry(GArray *table_data, PCMachineState *pcms, > + int i, uint64_t mem_base, uint64_t mem_len) > +{ > + AcpiSratMemoryAffinity *numamem; > + uint64_t next_base; > + > + next_base = mem_base + mem_len; > + > + /* Cut out the ACPI_PCI hole */ > + if (mem_base <= pcms->below_4g_mem_size && > + next_base > pcms->below_4g_mem_size) { > + mem_len -= next_base - pcms->below_4g_mem_size; > + if (mem_len > 0) { > + numamem = acpi_data_push(table_data, sizeof *numamem); > + build_srat_memory(numamem, mem_base, mem_len, i, > + MEM_AFFINITY_ENABLED); > + } > + mem_base = 1ULL << 32; > + mem_len = next_base - pcms->below_4g_mem_size; > + next_base += (1ULL << 32) - pcms->below_4g_mem_size; > + } > + numamem = acpi_data_push(table_data, sizeof *numamem); > + build_srat_memory(numamem, mem_base, mem_len, i, > + MEM_AFFINITY_ENABLED); > + return next_base; > +} > + > static void > build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) > { > AcpiSystemResourceAffinityTable *srat; > AcpiSratMemoryAffinity *numamem; > > - int i; > + int i, node; > int srat_start, numa_start, slots; > - uint64_t mem_len, mem_base, next_base; > + uint64_t mem_len, mem_base; > MachineClass *mc = MACHINE_GET_CLASS(machine); > const CPUArchIdList *apic_ids = mc->possible_cpu_arch_ids(machine); > PCMachineState *pcms = PC_MACHINE(machine); > @@ -2370,36 +2398,30 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) > /* the memory map is a bit tricky, it contains at least one hole > * from 640k-1M and possibly another one from 3.5G-4G. > */ > - next_base = 0; > + > numa_start = table_data->len; > > - numamem = acpi_data_push(table_data, sizeof *numamem); > - build_srat_memory(numamem, 0, 640 * 1024, 0, MEM_AFFINITY_ENABLED); > - next_base = 1024 * 1024; > - for (i = 1; i < pcms->numa_nodes + 1; ++i) { > - mem_base = next_base; > - mem_len = pcms->node_mem[i - 1]; > - if (i == 1) { > - mem_len -= 1024 * 1024; > + /* get the first node which has memory and map the hole from 640K-1M */ > + for (node = 0; node < pcms->numa_nodes; node++) { > + if (pcms->node_mem[node] != 0) { > + break; > } > - next_base = mem_base + mem_len; > - > - /* Cut out the ACPI_PCI hole */ > - if (mem_base <= pcms->below_4g_mem_size && > - next_base > pcms->below_4g_mem_size) { > - mem_len -= next_base - pcms->below_4g_mem_size; > - if (mem_len > 0) { > - numamem = acpi_data_push(table_data, sizeof *numamem); > - build_srat_memory(numamem, mem_base, mem_len, i - 1, > - MEM_AFFINITY_ENABLED); > - } > - mem_base = 1ULL << 32; > - mem_len = next_base - pcms->below_4g_mem_size; > - next_base += (1ULL << 32) - pcms->below_4g_mem_size; > + } > + numamem = acpi_data_push(table_data, sizeof *numamem); > + build_srat_memory(numamem, 0, 640 * 1024, node, MEM_AFFINITY_ENABLED); > + > + /* map the rest of memory from 1M */ > + mem_base = 1024 * 1024; > + mem_len = pcms->node_mem[node] - mem_base; > + mem_base = build_srat_node_entry(table_data, pcms, node, > + mem_base, mem_len); > + > + for (i = 0; i < pcms->numa_nodes; i++) { > + if (i == node) { > + continue; > } > - numamem = acpi_data_push(table_data, sizeof *numamem); > - build_srat_memory(numamem, mem_base, mem_len, i - 1, > - MEM_AFFINITY_ENABLED); > + mem_base = build_srat_node_entry(table_data, pcms, i, > + mem_base, pcms->node_mem[i]); > } > slots = (table_data->len - numa_start) / sizeof *numamem; > for (; slots < pcms->numa_nodes + 2; slots++) { > -- > 2.5.5 > >
On Fri, Aug 18, 2017 at 09:28:55PM +0300, Michael S. Tsirkin wrote: > On Wed, Aug 16, 2017 at 09:26:51AM +0800, Dou Liyang wrote: > > Currently, Using the fisrt node without memory on the machine makes > > QEMU unhappy. With this example command line: > > ... \ > > -m 1024M,slots=4,maxmem=32G \ > > -numa node,nodeid=0 \ > > -numa node,mem=1024M,nodeid=1 \ > > -numa node,nodeid=2 \ > > -numa node,nodeid=3 \ > > Guest reports "No NUMA configuration found" and the NUMA topology is > > wrong. > > > > This is because when QEMU builds ACPI SRAT, it regards node0 as the > > default node to deal with the memory hole(640K-1M). this means the > > node0 must have some memory(>1M), but, actually it can have no > > memory. > > > > Fix this problem by replace the node0 with the first node which has > > memory on it. Add a new function for each node. Also do some cleanup. > > > > Signed-off-by: Dou Liyang <douly.fnst@cn.fujitsu.com> > > This isn't a regression, is it? > If so I think it's safe to postpone this to 2.11. Agreed. I was already treating it as a candidate for 2.11 only.
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 98dd424..f93d712 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -2318,15 +2318,43 @@ build_tpm2(GArray *table_data, BIOSLinker *linker) (void *)tpm2_ptr, "TPM2", sizeof(*tpm2_ptr), 4, NULL, NULL); } +static uint64_t +build_srat_node_entry(GArray *table_data, PCMachineState *pcms, + int i, uint64_t mem_base, uint64_t mem_len) +{ + AcpiSratMemoryAffinity *numamem; + uint64_t next_base; + + next_base = mem_base + mem_len; + + /* Cut out the ACPI_PCI hole */ + if (mem_base <= pcms->below_4g_mem_size && + next_base > pcms->below_4g_mem_size) { + mem_len -= next_base - pcms->below_4g_mem_size; + if (mem_len > 0) { + numamem = acpi_data_push(table_data, sizeof *numamem); + build_srat_memory(numamem, mem_base, mem_len, i, + MEM_AFFINITY_ENABLED); + } + mem_base = 1ULL << 32; + mem_len = next_base - pcms->below_4g_mem_size; + next_base += (1ULL << 32) - pcms->below_4g_mem_size; + } + numamem = acpi_data_push(table_data, sizeof *numamem); + build_srat_memory(numamem, mem_base, mem_len, i, + MEM_AFFINITY_ENABLED); + return next_base; +} + static void build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) { AcpiSystemResourceAffinityTable *srat; AcpiSratMemoryAffinity *numamem; - int i; + int i, node; int srat_start, numa_start, slots; - uint64_t mem_len, mem_base, next_base; + uint64_t mem_len, mem_base; MachineClass *mc = MACHINE_GET_CLASS(machine); const CPUArchIdList *apic_ids = mc->possible_cpu_arch_ids(machine); PCMachineState *pcms = PC_MACHINE(machine); @@ -2370,36 +2398,30 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) /* the memory map is a bit tricky, it contains at least one hole * from 640k-1M and possibly another one from 3.5G-4G. */ - next_base = 0; + numa_start = table_data->len; - numamem = acpi_data_push(table_data, sizeof *numamem); - build_srat_memory(numamem, 0, 640 * 1024, 0, MEM_AFFINITY_ENABLED); - next_base = 1024 * 1024; - for (i = 1; i < pcms->numa_nodes + 1; ++i) { - mem_base = next_base; - mem_len = pcms->node_mem[i - 1]; - if (i == 1) { - mem_len -= 1024 * 1024; + /* get the first node which has memory and map the hole from 640K-1M */ + for (node = 0; node < pcms->numa_nodes; node++) { + if (pcms->node_mem[node] != 0) { + break; } - next_base = mem_base + mem_len; - - /* Cut out the ACPI_PCI hole */ - if (mem_base <= pcms->below_4g_mem_size && - next_base > pcms->below_4g_mem_size) { - mem_len -= next_base - pcms->below_4g_mem_size; - if (mem_len > 0) { - numamem = acpi_data_push(table_data, sizeof *numamem); - build_srat_memory(numamem, mem_base, mem_len, i - 1, - MEM_AFFINITY_ENABLED); - } - mem_base = 1ULL << 32; - mem_len = next_base - pcms->below_4g_mem_size; - next_base += (1ULL << 32) - pcms->below_4g_mem_size; + } + numamem = acpi_data_push(table_data, sizeof *numamem); + build_srat_memory(numamem, 0, 640 * 1024, node, MEM_AFFINITY_ENABLED); + + /* map the rest of memory from 1M */ + mem_base = 1024 * 1024; + mem_len = pcms->node_mem[node] - mem_base; + mem_base = build_srat_node_entry(table_data, pcms, node, + mem_base, mem_len); + + for (i = 0; i < pcms->numa_nodes; i++) { + if (i == node) { + continue; } - numamem = acpi_data_push(table_data, sizeof *numamem); - build_srat_memory(numamem, mem_base, mem_len, i - 1, - MEM_AFFINITY_ENABLED); + mem_base = build_srat_node_entry(table_data, pcms, i, + mem_base, pcms->node_mem[i]); } slots = (table_data->len - numa_start) / sizeof *numamem; for (; slots < pcms->numa_nodes + 2; slots++) {
Currently, Using the fisrt node without memory on the machine makes QEMU unhappy. With this example command line: ... \ -m 1024M,slots=4,maxmem=32G \ -numa node,nodeid=0 \ -numa node,mem=1024M,nodeid=1 \ -numa node,nodeid=2 \ -numa node,nodeid=3 \ Guest reports "No NUMA configuration found" and the NUMA topology is wrong. This is because when QEMU builds ACPI SRAT, it regards node0 as the default node to deal with the memory hole(640K-1M). this means the node0 must have some memory(>1M), but, actually it can have no memory. Fix this problem by replace the node0 with the first node which has memory on it. Add a new function for each node. Also do some cleanup. Signed-off-by: Dou Liyang <douly.fnst@cn.fujitsu.com> --- V2 --> V1: -Fix a coding style problem Replace for (node = 0; node < pcms->numa_nodes && pcms->node_mem[node] == 0; node++); with for (node = 0; node < pcms->numa_nodes; node++) { if (pcms->node_mem[node] != 0) { break; } hw/i386/acpi-build.c | 78 +++++++++++++++++++++++++++++++++------------------- 1 file changed, 50 insertions(+), 28 deletions(-)