diff mbox series

[2/3] hw/core/numa: add attribute to skip creation of MachineState.ram region

Message ID 20250303100732.576457-3-npiggin@gmail.com (mailing list archive)
State New
Headers show
Series ppc/pnv: Support sparse NUMA memory addresses | expand

Commit Message

Nicholas Piggin March 3, 2025, 10:07 a.m. UTC
NUMA machines with sparse address topologies do not want all NUMA
regions packed densely inside the MachineState.ram container region.
Add a machine class attribute that skips creating this container
region. Individual NUMA memory device regions are recorded in NodeInfo
where the machine init can add them to the system address space itself.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 include/hw/boards.h   |  6 ++++++
 include/system/numa.h |  1 +
 hw/core/numa.c        | 44 +++++++++++++++++++++++++++++++++++--------
 3 files changed, 43 insertions(+), 8 deletions(-)
diff mbox series

Patch

diff --git a/include/hw/boards.h b/include/hw/boards.h
index 9360d1ce394..9e6654ee9ca 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -233,6 +233,11 @@  typedef struct {
  *    is not needed.
  * @numa_mem_supported:
  *    true if '--numa node.mem' option is supported and false otherwise
+ * @numa_skip_ram_container:
+ *    If false, numa memory init creates the MachineState.ram memory region
+ *    with all numa node regions packed densely within it. If true, the .ram
+ *    region is not created. Machines can use this e.g., to place NUMA
+ *    regions sparsely within the address space.
  * @hotplug_allowed:
  *    If the hook is provided, then it'll be called for each device
  *    hotplug to check whether the device hotplug is allowed.  Return
@@ -311,6 +316,7 @@  struct MachineClass {
     bool nvdimm_supported;
     bool numa_mem_supported;
     bool auto_enable_numa;
+    bool numa_skip_ram_container;
     bool cpu_cluster_has_numa_boundary;
     SMPCompatProps smp_props;
     const char *default_ram_id;
diff --git a/include/system/numa.h b/include/system/numa.h
index 1044b0eb6e9..001e872d33e 100644
--- a/include/system/numa.h
+++ b/include/system/numa.h
@@ -38,6 +38,7 @@  enum {
 typedef struct NodeInfo {
     uint64_t node_mem;
     struct HostMemoryBackend *node_memdev;
+    MemoryRegion *node_mr;
     bool present;
     bool has_cpu;
     bool has_gi;
diff --git a/hw/core/numa.c b/hw/core/numa.c
index 218576f7455..d84b2d70849 100644
--- a/hw/core/numa.c
+++ b/hw/core/numa.c
@@ -623,19 +623,46 @@  static void complete_init_numa_distance(MachineState *ms)
     }
 }
 
-static void numa_init_memdev_container(MachineState *ms, MemoryRegion *ram)
+/*
+ * Consume all NUMA memory backends and store the regions in NodeInfo.node_mr.
+ */
+static void numa_init_memdev(MachineState *ms)
 {
     int i;
-    uint64_t addr = 0;
 
     for (i = 0; i < ms->numa_state->num_nodes; i++) {
-        uint64_t size = ms->numa_state->nodes[i].node_mem;
         HostMemoryBackend *backend = ms->numa_state->nodes[i].node_memdev;
         if (!backend) {
             continue;
         }
         MemoryRegion *seg = machine_consume_memdev(ms, backend);
-        memory_region_add_subregion(ram, addr, seg);
+        ms->numa_state->nodes[i].node_mr = seg;
+    }
+}
+
+/*
+ * Consume all NUMA memory backends as with numa_init_memdev, packing them
+ * densely into a MachineState.ram "container" region.
+ */
+static void numa_init_memdev_container(MachineState *ms)
+{
+    int i;
+    MachineClass *mc = MACHINE_GET_CLASS(ms);
+    uint64_t addr = 0;
+
+    ms->ram = g_new(MemoryRegion, 1);
+    memory_region_init(ms->ram, OBJECT(ms), mc->default_ram_id,
+                       ms->ram_size);
+
+    numa_init_memdev(ms);
+
+    for (i = 0; i < ms->numa_state->num_nodes; i++) {
+        uint64_t size = ms->numa_state->nodes[i].node_mem;
+        MemoryRegion *seg = ms->numa_state->nodes[i].node_mr;
+        if (!seg) {
+            continue;
+        }
+        memory_region_add_subregion(ms->ram, addr, seg);
         addr += size;
     }
 }
@@ -706,10 +733,11 @@  void numa_complete_configuration(MachineState *ms)
                              " properties are mutually exclusive");
                 exit(1);
             }
-            ms->ram = g_new(MemoryRegion, 1);
-            memory_region_init(ms->ram, OBJECT(ms), mc->default_ram_id,
-                               ms->ram_size);
-            numa_init_memdev_container(ms, ms->ram);
+            if (mc->numa_skip_ram_container) {
+                numa_init_memdev(ms);
+            } else {
+                numa_init_memdev_container(ms);
+            }
         }
         /* QEMU needs at least all unique node pair distances to build
          * the whole NUMA distance table. QEMU treats the distance table