@@ -233,6 +233,11 @@ typedef struct {
* is not needed.
* @numa_mem_supported:
* true if '--numa node.mem' option is supported and false otherwise
+ * @numa_skip_ram_container:
+ * If false, numa memory init creates the MachineState.ram memory region
+ * with all numa node regions packed densely within it. If true, the .ram
+ * region is not created. Machines can use this e.g., to place NUMA
+ * regions sparsely within the address space.
* @hotplug_allowed:
* If the hook is provided, then it'll be called for each device
* hotplug to check whether the device hotplug is allowed. Return
@@ -311,6 +316,7 @@ struct MachineClass {
bool nvdimm_supported;
bool numa_mem_supported;
bool auto_enable_numa;
+ bool numa_skip_ram_container;
bool cpu_cluster_has_numa_boundary;
SMPCompatProps smp_props;
const char *default_ram_id;
@@ -38,6 +38,7 @@ enum {
typedef struct NodeInfo {
uint64_t node_mem;
struct HostMemoryBackend *node_memdev;
+ MemoryRegion *node_mr;
bool present;
bool has_cpu;
bool has_gi;
@@ -623,19 +623,46 @@ static void complete_init_numa_distance(MachineState *ms)
}
}
-static void numa_init_memdev_container(MachineState *ms, MemoryRegion *ram)
+/*
+ * Consume all NUMA memory backends and store the regions in NodeInfo.node_mr.
+ */
+static void numa_init_memdev(MachineState *ms)
{
int i;
- uint64_t addr = 0;
for (i = 0; i < ms->numa_state->num_nodes; i++) {
- uint64_t size = ms->numa_state->nodes[i].node_mem;
HostMemoryBackend *backend = ms->numa_state->nodes[i].node_memdev;
if (!backend) {
continue;
}
MemoryRegion *seg = machine_consume_memdev(ms, backend);
- memory_region_add_subregion(ram, addr, seg);
+ ms->numa_state->nodes[i].node_mr = seg;
+ }
+}
+
+/*
+ * Consume all NUMA memory backends as with numa_init_memdev, packing them
+ * densely into a MachineState.ram "container" region.
+ */
+static void numa_init_memdev_container(MachineState *ms)
+{
+ int i;
+ MachineClass *mc = MACHINE_GET_CLASS(ms);
+ uint64_t addr = 0;
+
+ ms->ram = g_new(MemoryRegion, 1);
+ memory_region_init(ms->ram, OBJECT(ms), mc->default_ram_id,
+ ms->ram_size);
+
+ numa_init_memdev(ms);
+
+ for (i = 0; i < ms->numa_state->num_nodes; i++) {
+ uint64_t size = ms->numa_state->nodes[i].node_mem;
+ MemoryRegion *seg = ms->numa_state->nodes[i].node_mr;
+ if (!seg) {
+ continue;
+ }
+ memory_region_add_subregion(ms->ram, addr, seg);
addr += size;
}
}
@@ -706,10 +733,11 @@ void numa_complete_configuration(MachineState *ms)
" properties are mutually exclusive");
exit(1);
}
- ms->ram = g_new(MemoryRegion, 1);
- memory_region_init(ms->ram, OBJECT(ms), mc->default_ram_id,
- ms->ram_size);
- numa_init_memdev_container(ms, ms->ram);
+ if (mc->numa_skip_ram_container) {
+ numa_init_memdev(ms);
+ } else {
+ numa_init_memdev_container(ms);
+ }
}
/* QEMU needs at least all unique node pair distances to build
* the whole NUMA distance table. QEMU treats the distance table
NUMA machines with sparse address topologies do not want all NUMA regions packed densely inside the MachineState.ram container region. Add a machine class attribute that skips creating this container region. Individual NUMA memory device regions are recorded in NodeInfo where the machine init can add them to the system address space itself. Signed-off-by: Nicholas Piggin <npiggin@gmail.com> --- include/hw/boards.h | 6 ++++++ include/system/numa.h | 1 + hw/core/numa.c | 44 +++++++++++++++++++++++++++++++++++-------- 3 files changed, 43 insertions(+), 8 deletions(-)