Message ID | 20241022101029.967911-1-bernhardkaindl7@gmail.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | [1/1] NUMA: Introduce NODE_DATA->node_present_pages(RAM pages) | expand |
Hi, The subject was probably meant to have a v3? On Tue Oct 22, 2024 at 11:10 AM BST, Bernhard Kaindl wrote: > From: Bernhard Kaindl <bernhard.kaindl@cloud.com> > > Some admin tools like 'xl info -n' like to display the total memory > for each NUMA node. The Xen backend[1] of hwloc comes to mind too. > > The total amount of RAM on a NUMA node is not needed by Xen internally: > Xen only uses NODE_DATA->node_spanned_pages, but that can be confusing > for users as it includes memory holes (can be as large as 2GB on x86). > > Calculate the RAM per NUMA node by iterating over arch_get_ram_range() > which returns the e820 RAM entries on x86 and update it on memory_add(). > > Use NODE_DATA->node_present_pages (like in the Linux kernel) to hold > this info and in a later commit, find a way for tools to read it. Part of this information would be more helpful in a comment in the definition of node_data, I think. > > [1] hwloc with Xen backend: https://github.com/xenserver-next/hwloc/ > > Signed-off-by: Bernhard Kaindl <bernhard.kaindl@cloud.com> > --- > Changes in v2: > - Remove update of numainfo call, only calculate RAM for each node. > - Calculate RAM based on page boundaries, coding style fixes > Changes in v3: > - Use PFN_UP/DOWN, refactored further to simplify the code, while leaving > compiler-level optimisations to the compiler's optimisation passes. > --- > xen/arch/x86/x86_64/mm.c | 3 +++ > xen/common/numa.c | 31 ++++++++++++++++++++++++++++--- > xen/include/xen/numa.h | 3 +++ > 3 files changed, 34 insertions(+), 3 deletions(-) > > diff --git a/xen/arch/x86/x86_64/mm.c b/xen/arch/x86/x86_64/mm.c > index b2a280fba3..66b9bed057 100644 > --- a/xen/arch/x86/x86_64/mm.c > +++ b/xen/arch/x86/x86_64/mm.c > @@ -1334,6 +1334,9 @@ int memory_add(unsigned long spfn, unsigned long epfn, unsigned int pxm) > share_hotadd_m2p_table(&info); > transfer_pages_to_heap(&info); > > + /* Update the node's present pages (like the total_pages of the system) */ > + NODE_DATA(node)->node_present_pages += epfn - spfn; > + > return 0; > > destroy_m2p: > diff --git a/xen/common/numa.c b/xen/common/numa.c > index 28a09766fa..374132df08 100644 > --- a/xen/common/numa.c > +++ b/xen/common/numa.c > @@ -4,6 +4,7 @@ > * Adapted for Xen: Ryan Harper <ryanh@us.ibm.com> > */ > > +#include "xen/pfn.h" > #include <xen/init.h> > #include <xen/keyhandler.h> > #include <xen/mm.h> > @@ -499,15 +500,39 @@ int __init compute_hash_shift(const struct node *nodes, > return shift; > } > > -/* Initialize NODE_DATA given nodeid and start/end */ > +/** > + * @brief Initialize a NUMA node's NODE_DATA given nodeid and start/end addrs. > + * > + * This function sets up the boot memory for a given NUMA node by calculating > + * the node's start and end page frame numbers (PFNs) and determining > + * the number of present RAM pages within the node's memory range. > + * > + * @param nodeid The identifier of the node to initialize. > + * @param start The starting physical address of the node's memory range. > + * @param end The ending physical address of the node's memory range. I'd add that end is "exclusive". To make it unambiguous. > + */ > void __init setup_node_bootmem(nodeid_t nodeid, paddr_t start, paddr_t end) > { > unsigned long start_pfn = paddr_to_pfn(start); > unsigned long end_pfn = paddr_to_pfn(end); > + struct node_data *numa_node = NODE_DATA(nodeid); > + paddr_t start_ram, end_ram; > + unsigned long pages = 0; > + unsigned int idx = 0; > + int err; > > - NODE_DATA(nodeid)->node_start_pfn = start_pfn; > - NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn; > + numa_node->node_start_pfn = start_pfn; > + numa_node->node_spanned_pages = end_pfn - start_pfn; > > + /* Calculate the number of present RAM pages within the node: */ > + while ( (err = arch_get_ram_range(idx++, &start_ram, &end_ram)) != -ENOENT ) nit: This line seems quite overloaded. Might be easier for the eye as a do-while, with "int err" being defined inside the loop itself. > + { > + if ( err || start_ram >= end || end_ram <= start ) > + continue; /* Not RAM (err != 0) or range is outside the node */ > + > + pages += PFN_DOWN(min(end_ram, end)) - PFN_UP(max(start_ram, start)); > + } > + numa_node->node_present_pages = pages; > node_set_online(nodeid); > } > > diff --git a/xen/include/xen/numa.h b/xen/include/xen/numa.h > index fd1511a6fb..c860f3ad1c 100644 > --- a/xen/include/xen/numa.h > +++ b/xen/include/xen/numa.h > @@ -71,6 +71,7 @@ extern nodeid_t *memnodemap; > struct node_data { > unsigned long node_start_pfn; > unsigned long node_spanned_pages; > + unsigned long node_present_pages; > }; > > extern struct node_data node_data[]; > @@ -91,6 +92,7 @@ static inline nodeid_t mfn_to_nid(mfn_t mfn) > > #define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) > #define node_spanned_pages(nid) (NODE_DATA(nid)->node_spanned_pages) > +#define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages) > #define node_end_pfn(nid) (NODE_DATA(nid)->node_start_pfn + \ > NODE_DATA(nid)->node_spanned_pages) > > @@ -123,6 +125,7 @@ extern void numa_set_processor_nodes_parsed(nodeid_t node); > extern mfn_t first_valid_mfn; > > #define node_spanned_pages(nid) (max_page - mfn_x(first_valid_mfn)) > +#define node_present_pages(nid) total_pages > #define node_start_pfn(nid) mfn_x(first_valid_mfn) > #define __node_distance(a, b) 20 > Cheers, Alejandro
diff --git a/xen/arch/x86/x86_64/mm.c b/xen/arch/x86/x86_64/mm.c index b2a280fba3..66b9bed057 100644 --- a/xen/arch/x86/x86_64/mm.c +++ b/xen/arch/x86/x86_64/mm.c @@ -1334,6 +1334,9 @@ int memory_add(unsigned long spfn, unsigned long epfn, unsigned int pxm) share_hotadd_m2p_table(&info); transfer_pages_to_heap(&info); + /* Update the node's present pages (like the total_pages of the system) */ + NODE_DATA(node)->node_present_pages += epfn - spfn; + return 0; destroy_m2p: diff --git a/xen/common/numa.c b/xen/common/numa.c index 28a09766fa..374132df08 100644 --- a/xen/common/numa.c +++ b/xen/common/numa.c @@ -4,6 +4,7 @@ * Adapted for Xen: Ryan Harper <ryanh@us.ibm.com> */ +#include "xen/pfn.h" #include <xen/init.h> #include <xen/keyhandler.h> #include <xen/mm.h> @@ -499,15 +500,39 @@ int __init compute_hash_shift(const struct node *nodes, return shift; } -/* Initialize NODE_DATA given nodeid and start/end */ +/** + * @brief Initialize a NUMA node's NODE_DATA given nodeid and start/end addrs. + * + * This function sets up the boot memory for a given NUMA node by calculating + * the node's start and end page frame numbers (PFNs) and determining + * the number of present RAM pages within the node's memory range. + * + * @param nodeid The identifier of the node to initialize. + * @param start The starting physical address of the node's memory range. + * @param end The ending physical address of the node's memory range. + */ void __init setup_node_bootmem(nodeid_t nodeid, paddr_t start, paddr_t end) { unsigned long start_pfn = paddr_to_pfn(start); unsigned long end_pfn = paddr_to_pfn(end); + struct node_data *numa_node = NODE_DATA(nodeid); + paddr_t start_ram, end_ram; + unsigned long pages = 0; + unsigned int idx = 0; + int err; - NODE_DATA(nodeid)->node_start_pfn = start_pfn; - NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn; + numa_node->node_start_pfn = start_pfn; + numa_node->node_spanned_pages = end_pfn - start_pfn; + /* Calculate the number of present RAM pages within the node: */ + while ( (err = arch_get_ram_range(idx++, &start_ram, &end_ram)) != -ENOENT ) + { + if ( err || start_ram >= end || end_ram <= start ) + continue; /* Not RAM (err != 0) or range is outside the node */ + + pages += PFN_DOWN(min(end_ram, end)) - PFN_UP(max(start_ram, start)); + } + numa_node->node_present_pages = pages; node_set_online(nodeid); } diff --git a/xen/include/xen/numa.h b/xen/include/xen/numa.h index fd1511a6fb..c860f3ad1c 100644 --- a/xen/include/xen/numa.h +++ b/xen/include/xen/numa.h @@ -71,6 +71,7 @@ extern nodeid_t *memnodemap; struct node_data { unsigned long node_start_pfn; unsigned long node_spanned_pages; + unsigned long node_present_pages; }; extern struct node_data node_data[]; @@ -91,6 +92,7 @@ static inline nodeid_t mfn_to_nid(mfn_t mfn) #define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) #define node_spanned_pages(nid) (NODE_DATA(nid)->node_spanned_pages) +#define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages) #define node_end_pfn(nid) (NODE_DATA(nid)->node_start_pfn + \ NODE_DATA(nid)->node_spanned_pages) @@ -123,6 +125,7 @@ extern void numa_set_processor_nodes_parsed(nodeid_t node); extern mfn_t first_valid_mfn; #define node_spanned_pages(nid) (max_page - mfn_x(first_valid_mfn)) +#define node_present_pages(nid) total_pages #define node_start_pfn(nid) mfn_x(first_valid_mfn) #define __node_distance(a, b) 20