diff mbox series

[v2,2/2] x86/mem_sharing: replace using page_lock with our own lock

Message ID 20190412194712.1602-2-tamas@tklengyel.com (mailing list archive)
State Superseded
Headers show
Series [v2,1/2] x86/mem_sharing: reorder when pages are unlocked and released | expand

Commit Message

Tamas K Lengyel April 12, 2019, 7:47 p.m. UTC
The page_lock system was not intended to be used outside the PV pagetable
code. Replace its use in mem_sharing with an internal rwlock.

Signed-off-by: Tamas K Lengyel <tamas@tklengyel.com>
Cc: George Dunlap <george.dunlap@eu.citrix.com>
Cc: Jan Beulich <jbeulich@suse.com>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>
Cc: Wei Liu <wei.liu2@citrix.com>
Cc: Roger Pau Monne <roger.pau@citrix.com>
---
 xen/arch/x86/mm/mem_sharing.c     | 88 ++++++++++++-------------------
 xen/include/asm-x86/mem_sharing.h |  1 +
 2 files changed, 34 insertions(+), 55 deletions(-)

Comments

Tamas K Lengyel April 15, 2019, 4:53 a.m. UTC | #1
On Fri, Apr 12, 2019 at 1:50 PM Tamas K Lengyel <tamas@tklengyel.com> wrote:
>
> The page_lock system was not intended to be used outside the PV pagetable
> code. Replace its use in mem_sharing with an internal rwlock.
>

After further testing I'm actually running into a lot of strange
problems with this when domains with shared memory get destroyed,
oddly enough after mem_sharing finishes cleaning up: (XEN)
[<ffff82d0802729b9>] domain.c#relinquish_memory+0x62/0x4f8. I tried
moving the lock into page_info instead (in a new struct with
sharing_info) but this approach seems to require a lot more effort and
a lot more thought put into it as page_lock seems to have some other
side-effect that makes this all work. Unfortunately at this time I
can't afford chasing this path further so I'll just go with the
band-aid solution of making page_lock/page_unlock mem_sharing
friendly.

Tamas
diff mbox series

Patch

diff --git a/xen/arch/x86/mm/mem_sharing.c b/xen/arch/x86/mm/mem_sharing.c
index 708037d91e..0afbff1d89 100644
--- a/xen/arch/x86/mm/mem_sharing.c
+++ b/xen/arch/x86/mm/mem_sharing.c
@@ -50,7 +50,7 @@  typedef struct pg_lock_data {
 static DEFINE_PER_CPU(pg_lock_data_t, __pld);
 
 #define MEM_SHARING_DEBUG(_f, _a...)                                  \
-    debugtrace_printk("mem_sharing_debug: %s(): " _f, __func__, ##_a)
+    gdprintk(XENLOG_INFO, _f, ##_a)
 
 /* Reverse map defines */
 #define RMAP_HASHTAB_ORDER  0
@@ -112,30 +112,21 @@  static inline void page_sharing_dispose(struct page_info *page)
 
 #endif /* MEM_SHARING_AUDIT */
 
-static inline int mem_sharing_page_lock(struct page_info *pg)
+static inline void mem_sharing_page_lock(struct page_info *pg)
 {
-    int rc;
     pg_lock_data_t *pld = &(this_cpu(__pld));
-
     page_sharing_mm_pre_lock();
-    rc = page_lock(pg);
-    if ( rc )
-    {
-        preempt_disable();
-        page_sharing_mm_post_lock(&pld->mm_unlock_level, 
-                                  &pld->recurse_count);
-    }
-    return rc;
+    write_lock(&pg->sharing->lock);
+    page_sharing_mm_post_lock(&pld->mm_unlock_level, &pld->recurse_count);
 }
 
 static inline void mem_sharing_page_unlock(struct page_info *pg)
 {
     pg_lock_data_t *pld = &(this_cpu(__pld));
+    page_sharing_mm_unlock(pld->mm_unlock_level, &pld->recurse_count);
 
-    page_sharing_mm_unlock(pld->mm_unlock_level, 
-                           &pld->recurse_count);
-    preempt_enable();
-    page_unlock(pg);
+    if ( pg->sharing )
+        write_unlock(&pg->sharing->lock);
 }
 
 static inline shr_handle_t get_next_handle(void)
@@ -398,12 +389,8 @@  static struct page_info* mem_sharing_lookup(unsigned long mfn)
         struct page_info* page = mfn_to_page(_mfn(mfn));
         if ( page_get_owner(page) == dom_cow )
         {
-            /* Count has to be at least two, because we're called
-             * with the mfn locked (1) and this is supposed to be 
-             * a shared page (1). */
             unsigned long t = read_atomic(&page->u.inuse.type_info);
             ASSERT((t & PGT_type_mask) == PGT_shared_page);
-            ASSERT((t & PGT_count_mask) >= 2);
             ASSERT(SHARED_M2P(get_gpfn_from_mfn(mfn)));
             return page;
         }
@@ -437,14 +424,7 @@  static int audit(void)
         pg = pg_shared_info->pg;
         mfn = page_to_mfn(pg);
 
-        /* If we can't lock it, it's definitely not a shared page */
-        if ( !mem_sharing_page_lock(pg) )
-        {
-           MEM_SHARING_DEBUG("mfn %lx in audit list, but cannot be locked (%lx)!\n",
-                              mfn_x(mfn), pg->u.inuse.type_info);
-           errors++;
-           continue;
-        }
+        mem_sharing_page_lock(pg);
 
         /* Check if the MFN has correct type, owner and handle. */ 
         if ( (pg->u.inuse.type_info & PGT_type_mask) != PGT_shared_page )
@@ -517,12 +497,12 @@  static int audit(void)
             put_domain(d);
             nr_gfns++;
         }
-        /* The type count has an extra ref because we have locked the page */
-        if ( (nr_gfns + 1) != (pg->u.inuse.type_info & PGT_count_mask) )
+
+        if ( nr_gfns != (pg->u.inuse.type_info & PGT_count_mask) )
         {
             MEM_SHARING_DEBUG("Mismatched counts for MFN=%lx."
                               "nr_gfns in list %lu, in type_info %lx\n",
-                              mfn_x(mfn), nr_gfns, 
+                              mfn_x(mfn), nr_gfns,
                               (pg->u.inuse.type_info & PGT_count_mask));
             errors++;
         }
@@ -626,7 +606,6 @@  static int page_make_sharable(struct domain *d,
     drop_dom_ref = !domain_adjust_tot_pages(d, -1);
     page_list_del(page, &d->page_list);
     spin_unlock(&d->page_alloc_lock);
-
     if ( drop_dom_ref )
         put_domain(d);
     return 0;
@@ -652,7 +631,7 @@  static int page_make_private(struct domain *d, struct page_info *page)
     /* Because we are locking pages individually, we need to drop
      * the lock here, while the page is typed. We cannot risk the 
      * race of page_unlock and then put_page_type. */
-    expected_type = (PGT_shared_page | PGT_validated | PGT_locked | 2);
+    expected_type = (PGT_shared_page | PGT_validated | 2);
     if ( page->u.inuse.type_info != expected_type )
     {
         spin_unlock(&d->page_alloc_lock);
@@ -688,10 +667,7 @@  static inline struct page_info *__grab_shared_page(mfn_t mfn)
         return NULL;
     pg = mfn_to_page(mfn);
 
-    /* If the page is not validated we can't lock it, and if it's  
-     * not validated it's obviously not shared. */
-    if ( !mem_sharing_page_lock(pg) )
-        return NULL;
+    mem_sharing_page_lock(pg);
 
     if ( mem_sharing_lookup(mfn_x(mfn)) == NULL )
     {
@@ -770,6 +746,7 @@  static int nominate_page(struct domain *d, gfn_t gfn,
     p2m_type_t p2mt;
     p2m_access_t p2ma;
     mfn_t mfn;
+    struct page_sharing_info *sharing = NULL;
     struct page_info *page = NULL; /* gcc... */
     int ret;
 
@@ -833,38 +810,39 @@  static int nominate_page(struct domain *d, gfn_t gfn,
     }
 #endif
 
-    /* Try to convert the mfn to the sharable type */
-    page = mfn_to_page(mfn);
-    ret = page_make_sharable(d, page, expected_refcnt); 
-    if ( ret ) 
-        goto out;
+    /* We hold the gfn lock and this domain has the only reference to this page
+     * so we don't need any other locking, no other domain can use this page
+     * for sharing until we release the gfn lock */
 
-    /* Now that the page is validated, we can lock it. There is no 
-     * race because we're holding the p2m entry, so no one else 
-     * could be nominating this gfn */
-    ret = -ENOENT;
-    if ( !mem_sharing_page_lock(page) )
+    page = mfn_to_page(mfn);
+    /* Try to convert the mfn to the sharable type */
+    ret = page_make_sharable(d, page, expected_refcnt);
+    if ( ret )
         goto out;
 
     /* Initialize the shared state */
     ret = -ENOMEM;
-    if ( (page->sharing = 
-            xmalloc(struct page_sharing_info)) == NULL )
+    sharing = xmalloc(struct page_sharing_info);
+    if ( !sharing )
     {
-        /* Making a page private atomically unlocks it */
-        BUG_ON(page_make_private(d, page) != 0);
+        BUG_ON(!page_make_private(d, page));
         goto out;
     }
-    page->sharing->pg = page;
+
+    rwlock_init(&sharing->lock);
+
+    sharing->pg = page;
+    page->sharing = sharing;
+
     rmap_init(page);
 
     /* Create the handle */
-    page->sharing->handle = get_next_handle();  
+    page->sharing->handle = get_next_handle();
 
     /* Create the local gfn info */
     if ( mem_sharing_gfn_alloc(page, d, gfn_x(gfn)) == NULL )
     {
-        xfree(page->sharing);
+        xfree(sharing);
         page->sharing = NULL;
         BUG_ON(page_make_private(d, page) != 0);
         goto out;
@@ -881,7 +859,7 @@  static int nominate_page(struct domain *d, gfn_t gfn,
 
     *phandle = page->sharing->handle;
     audit_add_list(page);
-    mem_sharing_page_unlock(page);
+
     ret = 0;
 
 out:
diff --git a/xen/include/asm-x86/mem_sharing.h b/xen/include/asm-x86/mem_sharing.h
index 0e77b7d935..03cf651337 100644
--- a/xen/include/asm-x86/mem_sharing.h
+++ b/xen/include/asm-x86/mem_sharing.h
@@ -38,6 +38,7 @@  typedef struct rmap_hashtab {
 
 struct page_sharing_info
 {
+    rwlock_t lock;
     struct page_info *pg;   /* Back pointer to the page. */
     shr_handle_t handle;    /* Globally unique version / handle. */
 #if MEM_SHARING_AUDIT