diff mbox series

[3/3] ppc/pnv: Enable sparse chip RAM memory addresses

Message ID 20250303100732.576457-4-npiggin@gmail.com (mailing list archive)
State New
Headers show
Series ppc/pnv: Support sparse NUMA memory addresses | expand

Commit Message

Nicholas Piggin March 3, 2025, 10:07 a.m. UTC
Power CPUs place RAM memory regions for each chip (NUMA node) at
fixed locations in the real address space, resulting in a sparse
(disjoint) RAM address layout.

Use the new NUMA machine class attribute numa_skip_ram_container to
allow pnv machine init to lay out NUMA node memory regions into the
system address space in the proper location rather than packing them
densely from address 0.

With the following options:

  -smp 2,sockets=2 -m 4g
  -object memory-backend-ram,size=2G,id=mem0
  -object memory-backend-ram,size=2G,id=mem1
  -numa node,nodeid=0,memdev=mem0,cpus=0
  -numa node,nodeid=1,memdev=mem1,cpus=1

Linux (PowerNV) now boots with:

  node   0: [mem 0x0000000000000000-0x000000007fffffff]
  node   1: [mem 0x0000100000000000-0x000010007fffffff]

Prior to this change:

  node   0: [mem 0x0000000000000000-0x000000007fffffff]
  node   1: [mem 0x0000000000000000-0x00000000ffffffff]

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 hw/ppc/pnv.c | 37 ++++++++++++++++++++++++++++++++++---
 1 file changed, 34 insertions(+), 3 deletions(-)
diff mbox series

Patch

diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index 5f2041f7f9d..b6308593335 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -966,7 +966,24 @@  static void pnv_init(MachineState *machine)
         exit(EXIT_FAILURE);
     }
 
-    memory_region_add_subregion(get_system_memory(), 0, machine->ram);
+    if (machine->ram) {
+        memory_region_add_subregion(get_system_memory(), 0, machine->ram);
+    } else if (machine->numa_state) {
+        for (i = 0; i < machine->numa_state->num_nodes; i++) {
+            MemoryRegion *mr = machine->numa_state->nodes[i].node_mr;
+
+            /*
+             * powernv uses numa_mem_align_shift to derive the base RAM address
+             * for each chip addr = Chip Number << shift.
+             */
+            chip_ram_start = (uint64_t)i << mc->numa_mem_align_shift;
+            if (!mr) {
+                continue;
+            }
+            memory_region_add_subregion(get_system_memory(), chip_ram_start,
+                                        mr);
+        }
+    }
 
     /*
      * Create our simple PNOR device
@@ -1100,20 +1117,30 @@  static void pnv_init(MachineState *machine)
         exit(1);
     }
 
+    chip_ram_start = 0;
     pnv->chips = g_new0(PnvChip *, pnv->num_chips);
     for (i = 0; i < pnv->num_chips; i++) {
         char chip_name[32];
         Object *chip = OBJECT(qdev_new(chip_typename));
-        uint64_t chip_ram_size =  pnv_chip_get_ram_size(pnv, i);
+        uint64_t chip_ram_size;
 
         pnv->chips[i] = PNV_CHIP(chip);
 
+        if (machine->numa_state) {
+            chip_ram_start = (uint64_t)i << mc->numa_mem_align_shift;
+            chip_ram_size = machine->numa_state->nodes[i].node_mem;
+        } else {
+            chip_ram_size =  pnv_chip_get_ram_size(pnv, i);
+        }
+
         /* Distribute RAM among the chips  */
         object_property_set_int(chip, "ram-start", chip_ram_start,
                                 &error_fatal);
         object_property_set_int(chip, "ram-size", chip_ram_size,
                                 &error_fatal);
-        chip_ram_start += chip_ram_size;
+        if (!machine->numa_state) {
+            chip_ram_start += chip_ram_size;
+        }
 
         snprintf(chip_name, sizeof(chip_name), "chip[%d]", i);
         object_property_add_child(OBJECT(pnv), chip_name, chip);
@@ -2680,6 +2707,7 @@  static void pnv_machine_power8_class_init(ObjectClass *oc, void *data)
 
     mc->desc = "IBM PowerNV (Non-Virtualized) POWER8";
     mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power8_v2.0");
+    mc->numa_mem_align_shift = 42;
     compat_props_add(mc->compat_props, phb_compat, G_N_ELEMENTS(phb_compat));
 
     xic->icp_get = pnv_icp_get;
@@ -2709,6 +2737,7 @@  static void pnv_machine_power9_class_init(ObjectClass *oc, void *data)
 
     mc->desc = "IBM PowerNV (Non-Virtualized) POWER9";
     mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power9_v2.2");
+    mc->numa_mem_align_shift = 42;
     compat_props_add(mc->compat_props, phb_compat, G_N_ELEMENTS(phb_compat));
 
     xfc->match_nvt = pnv_match_nvt;
@@ -2747,6 +2776,7 @@  static void pnv_machine_p10_common_class_init(ObjectClass *oc, void *data)
     };
 
     mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power10_v2.0");
+    mc->numa_mem_align_shift = 44;
     compat_props_add(mc->compat_props, phb_compat, G_N_ELEMENTS(phb_compat));
 
     mc->alias = "powernv";
@@ -2951,6 +2981,7 @@  static void pnv_machine_class_init(ObjectClass *oc, void *data)
 
     mc->numa_mem_supported = true;
     mc->auto_enable_numa = true;
+    mc->numa_skip_ram_container = true;
 
     mc->cpu_index_to_instance_props = pnv_cpu_index_to_props;
     mc->get_default_cpu_node_id = pnv_get_default_cpu_node_id;