diff mbox series

[05/11] xen: Create per-node outstanding claims

Message ID 20250314172502.53498-6-alejandro.vallejo@cloud.com (mailing list archive)
State New
Headers show
Series Add support for exact-node memory claims | expand

Commit Message

Alejandro Vallejo March 14, 2025, 5:24 p.m. UTC
Extends domain_set_outstanding_claims() to allow staking claims on an
exact node. Also creates global per-node claim counts analogous to
`outstanding_claims`. Note that the per-node counts can't replace the
global one if we want exact-node claims to coexist with non-exact
claims.

Signed-off-by: Alejandro Vallejo <alejandro.vallejo@cloud.com>
---
 xen/common/page_alloc.c | 32 +++++++++++++++++++++++++++++++-
 xen/include/xen/sched.h |  3 +++
 2 files changed, 34 insertions(+), 1 deletion(-)
diff mbox series

Patch

diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index 9243c4f51370..7fe574b29407 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -490,6 +490,7 @@  static unsigned long pernode_avail_pages[MAX_NUMNODES];
 
 static DEFINE_SPINLOCK(heap_lock);
 static long outstanding_claims; /* total outstanding claims by all domains */
+static unsigned long pernode_oc[MAX_NUMNODES]; /* per-node outstanding claims */
 
 unsigned long domain_adjust_tot_pages(struct domain *d, nodeid_t node,
                                       long pages)
@@ -501,20 +502,31 @@  unsigned long domain_adjust_tot_pages(struct domain *d, nodeid_t node,
      * can test d->outstanding_pages race-free because it can only change
      * if d->page_alloc_lock and heap_lock are both held, see also
      * domain_set_outstanding_pages below
+     *
+     * If `d` has an exact-node claim, we must exit early if this is an
+     * adjustment attributed to another node.
      */
-    if ( !d->outstanding_pages || pages <= 0 )
+    if ( !d->outstanding_pages || pages <= 0 ||
+         (d->claim_node != NUMA_NO_NODE && d->claim_node != node) )
         goto out;
 
+
     spin_lock(&heap_lock);
     BUG_ON(outstanding_claims < d->outstanding_pages);
     if ( d->outstanding_pages < pages )
     {
         /* `pages` exceeds the domain's outstanding count. Zero it out. */
+        if ( d->claim_node != NUMA_NO_NODE )
+            pernode_oc[d->claim_node] -= d->outstanding_pages;
+
         outstanding_claims -= d->outstanding_pages;
         d->outstanding_pages = 0;
     }
     else
     {
+        if ( d->claim_node != NUMA_NO_NODE )
+            pernode_oc[d->claim_node] -= pages;
+
         outstanding_claims -= pages;
         d->outstanding_pages -= pages;
     }
@@ -542,6 +554,10 @@  int domain_set_outstanding_pages(struct domain *d, nodeid_t node,
     if ( pages == 0 )
     {
         outstanding_claims -= d->outstanding_pages;
+
+        if ( d->claim_node != NUMA_NO_NODE )
+            pernode_oc[d->claim_node] -= d->outstanding_pages;
+
         d->outstanding_pages = 0;
         ret = 0;
         goto out;
@@ -564,12 +580,26 @@  int domain_set_outstanding_pages(struct domain *d, nodeid_t node,
     /* how much memory is available? */
     avail_pages = total_avail_pages - outstanding_claims;
 
+    /* This check can't be skipped for the NUMA case, or we may overclaim */
     if ( pages > avail_pages )
         goto out;
 
+    if ( node != NUMA_NO_NODE )
+    {
+        avail_pages = pernode_avail_pages[node] - pernode_oc[node];
+
+        if ( pages > avail_pages )
+            goto out;
+    }
+
     /* yay, claim fits in available memory, stake the claim, success! */
     d->outstanding_pages = pages;
     outstanding_claims += d->outstanding_pages;
+    d->claim_node = node;
+
+    if ( node != NUMA_NO_NODE )
+        pernode_oc[node] += pages;
+
     ret = 0;
 
 out:
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 559d201e0c7e..307a9d749f5d 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -406,6 +406,9 @@  struct domain
     unsigned int     max_pages;         /* maximum value for domain_tot_pages() */
     unsigned int     extra_pages;       /* pages not included in domain_tot_pages() */
 
+    /* NUMA node from which outstanding pages have been reserved */
+    unsigned int     claim_node;
+
 #ifdef CONFIG_MEM_SHARING
     atomic_t         shr_pages;         /* shared pages */
 #endif