diff mbox series

[v8,3/4] mm: make pages allocated with MEMF_no_refcount safe to assign

Message ID 20200130145745.1306-4-pdurrant@amazon.com (mailing list archive)
State Superseded
Headers show
Series purge free_shared_domheap_page() | expand

Commit Message

Paul Durrant Jan. 30, 2020, 2:57 p.m. UTC
Currently it is unsafe to assign a domheap page allocated with
MEMF_no_refcount to a domain because the domain't 'tot_pages' will not
be incremented, but will be decrement when the page is freed (since
free_domheap_pages() has no way of telling that the increment was skipped).

This patch allocates a new 'count_info' bit for a PGC_extra flag
which is then used to mark pages when alloc_domheap_pages() is called
with MEMF_no_refcount. assign_pages() because it still needs to call
domain_adjust_tot_pages() to make sure the domain is appropriately
referenced. Hence it is modified to do that for PGC_extra pages even if it
is passed MEMF_no_refount.

The number of PGC_extra pages assigned to a domain is tracked in a new
'extra_pages' counter, which is then subtracted from 'total_pages' in
the domain_tot_pages() helper. Thus 'normal' page assignments will still
be appropriately checked against 'max_pages'.

Signed-off-by: Paul Durrant <pdurrant@amazon.com>
---
Cc: Andrew Cooper <andrew.cooper3@citrix.com>
Cc: George Dunlap <George.Dunlap@eu.citrix.com>
Cc: Ian Jackson <ian.jackson@eu.citrix.com>
Cc: Jan Beulich <jbeulich@suse.com>
Cc: Julien Grall <julien@xen.org>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Stefano Stabellini <sstabellini@kernel.org>
Cc: Wei Liu <wl@xen.org>
Cc: Volodymyr Babchuk <Volodymyr_Babchuk@epam.com>
Cc: "Roger Pau Monné" <roger.pau@citrix.com>

v8:
 - Drop the idea of post-allocation assignment adding an error path to
   steal_page() if it encounters a PGC_extra page
 - Tighten up the ASSERTs in assign_pages()

v7:
 - s/PGC_no_refcount/PGC_extra/g
 - Re-work allocation to account for 'extra' pages, also making it
   safe to assign PGC_extra pages post-allocation

v6:
 - Add an extra ASSERT into assign_pages() that PGC_no_refcount is not
   set if MEMF_no_refcount is clear
 - ASSERT that count_info is 0 in alloc_domheap_pages() and set to
   PGC_no_refcount rather than ORing

v5:
 - Make sure PGC_no_refcount is set before assign_pages() is called
 - Don't bother to clear PGC_no_refcount in free_domheap_pages() and
   drop ASSERT in free_heap_pages()
 - Don't latch count_info in free_heap_pages()

v4:
 - New in v4
---
 xen/arch/x86/mm.c        |  3 +-
 xen/common/page_alloc.c  | 63 +++++++++++++++++++++++++++++++---------
 xen/include/asm-arm/mm.h |  5 +++-
 xen/include/asm-x86/mm.h |  7 +++--
 xen/include/xen/sched.h  |  5 +++-
 5 files changed, 64 insertions(+), 19 deletions(-)

Comments

Jan Beulich Jan. 30, 2020, 4:27 p.m. UTC | #1
On 30.01.2020 15:57, Paul Durrant wrote:
> Currently it is unsafe to assign a domheap page allocated with
> MEMF_no_refcount to a domain because the domain't 'tot_pages' will not
> be incremented, but will be decrement when the page is freed (since
> free_domheap_pages() has no way of telling that the increment was skipped).
> 
> This patch allocates a new 'count_info' bit for a PGC_extra flag
> which is then used to mark pages when alloc_domheap_pages() is called
> with MEMF_no_refcount. assign_pages() because it still needs to call
> domain_adjust_tot_pages() to make sure the domain is appropriately
> referenced. Hence it is modified to do that for PGC_extra pages even if it
> is passed MEMF_no_refount.
> 
> The number of PGC_extra pages assigned to a domain is tracked in a new
> 'extra_pages' counter, which is then subtracted from 'total_pages' in
> the domain_tot_pages() helper. Thus 'normal' page assignments will still
> be appropriately checked against 'max_pages'.
> 
> Signed-off-by: Paul Durrant <pdurrant@amazon.com>

Reviewed-by: Jan Beulich <jbeulich@suse.com>
diff mbox series

Patch

diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index 8bb66cf30c..2796161c1f 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -4217,7 +4217,8 @@  int steal_page(
     if ( !(owner = page_get_owner_and_reference(page)) )
         goto fail;
 
-    if ( owner != d || is_xen_heap_page(page) )
+    if ( owner != d || is_xen_heap_page(page) ||
+         (page->count_info & PGC_extra) )
         goto fail_put;
 
     /*
diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index bbd3163909..1ac9d9c719 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -2267,7 +2267,29 @@  int assign_pages(
         goto out;
     }
 
-    if ( !(memflags & MEMF_no_refcount) )
+#ifndef NDEBUG
+    {
+        unsigned int extra_pages = 0;
+
+        for ( i = 0; i < (1ul << order); i++ )
+        {
+            ASSERT(!(pg[i].count_info & ~PGC_extra));
+            if ( pg[i].count_info & PGC_extra )
+                extra_pages++;
+        }
+
+        ASSERT(!extra_pages ||
+               ((memflags & MEMF_no_refcount) &&
+                extra_pages == 1u << order));
+    }
+#endif
+
+    if ( pg[0].count_info & PGC_extra )
+    {
+        d->extra_pages += 1u << order;
+        memflags &= ~MEMF_no_refcount;
+    }
+    else if ( !(memflags & MEMF_no_refcount) )
     {
         unsigned int tot_pages = domain_tot_pages(d) + (1 << order);
 
@@ -2278,18 +2300,19 @@  int assign_pages(
             rc = -E2BIG;
             goto out;
         }
+    }
 
-        if ( unlikely(domain_adjust_tot_pages(d, 1 << order) == (1 << order)) )
+    if ( !(memflags & MEMF_no_refcount) &&
+         unlikely(domain_adjust_tot_pages(d, 1 << order) == (1 << order)) )
             get_knownalive_domain(d);
-    }
 
     for ( i = 0; i < (1 << order); i++ )
     {
         ASSERT(page_get_owner(&pg[i]) == NULL);
-        ASSERT(!pg[i].count_info);
         page_set_owner(&pg[i], d);
         smp_wmb(); /* Domain pointer must be visible before updating refcnt. */
-        pg[i].count_info = PGC_allocated | 1;
+        pg[i].count_info =
+            (pg[i].count_info & PGC_extra) | PGC_allocated | 1;
         page_list_add_tail(&pg[i], &d->page_list);
     }
 
@@ -2315,11 +2338,6 @@  struct page_info *alloc_domheap_pages(
 
     if ( memflags & MEMF_no_owner )
         memflags |= MEMF_no_refcount;
-    else if ( (memflags & MEMF_no_refcount) && d )
-    {
-        ASSERT(!(memflags & MEMF_no_refcount));
-        return NULL;
-    }
 
     if ( !dma_bitsize )
         memflags &= ~MEMF_no_dma;
@@ -2332,11 +2350,23 @@  struct page_info *alloc_domheap_pages(
                                   memflags, d)) == NULL)) )
          return NULL;
 
-    if ( d && !(memflags & MEMF_no_owner) &&
-         assign_pages(d, pg, order, memflags) )
+    if ( d && !(memflags & MEMF_no_owner) )
     {
-        free_heap_pages(pg, order, memflags & MEMF_no_scrub);
-        return NULL;
+        if ( memflags & MEMF_no_refcount )
+        {
+            unsigned long i;
+
+            for ( i = 0; i < (1ul << order); i++ )
+            {
+                ASSERT(!pg[i].count_info);
+                pg[i].count_info = PGC_extra;
+            }
+        }
+        if ( assign_pages(d, pg, order, memflags) )
+        {
+            free_heap_pages(pg, order, memflags & MEMF_no_scrub);
+            return NULL;
+        }
     }
 
     return pg;
@@ -2384,6 +2414,11 @@  void free_domheap_pages(struct page_info *pg, unsigned int order)
                     BUG();
                 }
                 arch_free_heap_page(d, &pg[i]);
+                if ( pg[i].count_info & PGC_extra )
+                {
+                    ASSERT(d->extra_pages);
+                    d->extra_pages--;
+                }
             }
 
             drop_dom_ref = !domain_adjust_tot_pages(d, -(1 << order));
diff --git a/xen/include/asm-arm/mm.h b/xen/include/asm-arm/mm.h
index 333efd3a60..7df91280bc 100644
--- a/xen/include/asm-arm/mm.h
+++ b/xen/include/asm-arm/mm.h
@@ -119,9 +119,12 @@  struct page_info
 #define PGC_state_offlined PG_mask(2, 9)
 #define PGC_state_free    PG_mask(3, 9)
 #define page_state_is(pg, st) (((pg)->count_info&PGC_state) == PGC_state_##st)
+/* Page is not reference counted */
+#define _PGC_extra        PG_shift(10)
+#define PGC_extra         PG_mask(1, 10)
 
 /* Count of references to this frame. */
-#define PGC_count_width   PG_shift(9)
+#define PGC_count_width   PG_shift(10)
 #define PGC_count_mask    ((1UL<<PGC_count_width)-1)
 
 /*
diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
index 2ca8882ad0..06d64d494d 100644
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -77,9 +77,12 @@ 
 #define PGC_state_offlined PG_mask(2, 9)
 #define PGC_state_free    PG_mask(3, 9)
 #define page_state_is(pg, st) (((pg)->count_info&PGC_state) == PGC_state_##st)
+/* Page is not reference counted */
+#define _PGC_extra        PG_shift(10)
+#define PGC_extra         PG_mask(1, 10)
 
- /* Count of references to this frame. */
-#define PGC_count_width   PG_shift(9)
+/* Count of references to this frame. */
+#define PGC_count_width   PG_shift(10)
 #define PGC_count_mask    ((1UL<<PGC_count_width)-1)
 
 /*
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 1b6d7b941f..21b5f4cebd 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -374,6 +374,7 @@  struct domain
     unsigned int     xenheap_pages;     /* pages allocated from Xen heap */
     unsigned int     outstanding_pages; /* pages claimed but not possessed */
     unsigned int     max_pages;         /* maximum value for domain_tot_pages() */
+    unsigned int     extra_pages;       /* pages not included in domain_tot_pages() */
     atomic_t         shr_pages;         /* shared pages */
     atomic_t         paged_pages;       /* paged-out pages */
 
@@ -548,7 +549,9 @@  struct domain
 /* Return number of pages currently posessed by the domain */
 static inline unsigned int domain_tot_pages(const struct domain *d)
 {
-    return d->tot_pages;
+    ASSERT(d->extra_pages <= d->tot_pages);
+
+    return d->tot_pages - d->extra_pages;
 }
 
 /* Protect updates/reads (resp.) of domain_list and domain_hash. */