diff mbox

[RFC,XEN,v3,26/39] xen/pmem: add function to map PMEM pages to HVM domain

Message ID 20170911043820.14617-27-haozhong.zhang@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Haozhong Zhang Sept. 11, 2017, 4:38 a.m. UTC
pmem_populate() is added to map the specifed data PMEM pages to a HVM
domain. No called is added in this commit.

Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
---
Cc: Andrew Cooper <andrew.cooper3@citrix.com>
Cc: Jan Beulich <jbeulich@suse.com>
---
 xen/common/domain.c     |   3 ++
 xen/common/pmem.c       | 141 ++++++++++++++++++++++++++++++++++++++++++++++++
 xen/include/xen/pmem.h  |  19 +++++++
 xen/include/xen/sched.h |   3 ++
 4 files changed, 166 insertions(+)
diff mbox

Patch

diff --git a/xen/common/domain.c b/xen/common/domain.c
index 5aebcf265f..4354342b02 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -290,6 +290,9 @@  struct domain *domain_create(domid_t domid, unsigned int domcr_flags,
     INIT_PAGE_LIST_HEAD(&d->page_list);
     INIT_PAGE_LIST_HEAD(&d->xenpage_list);
 
+    spin_lock_init(&d->pmem_lock);
+    INIT_PAGE_LIST_HEAD(&d->pmem_page_list);
+
     spin_lock_init(&d->node_affinity_lock);
     d->node_affinity = NODE_MASK_ALL;
     d->auto_node_affinity = 1;
diff --git a/xen/common/pmem.c b/xen/common/pmem.c
index ed4a014c30..2f9ad64a26 100644
--- a/xen/common/pmem.c
+++ b/xen/common/pmem.c
@@ -17,10 +17,12 @@ 
  */
 
 #include <xen/errno.h>
+#include <xen/event.h>
 #include <xen/list.h>
 #include <xen/iocap.h>
 #include <xen/paging.h>
 #include <xen/pmem.h>
+#include <xen/sched.h>
 
 #include <asm/guest_access.h>
 
@@ -78,6 +80,31 @@  static bool check_overlap(unsigned long smfn1, unsigned long emfn1,
            (emfn1 > smfn2 && emfn1 <= emfn2);
 }
 
+static bool check_cover(struct list_head *list,
+                        unsigned long smfn, unsigned long emfn)
+{
+    struct list_head *cur;
+    struct pmem *pmem;
+    unsigned long pmem_smfn, pmem_emfn;
+
+    list_for_each(cur, list)
+    {
+        pmem = list_entry(cur, struct pmem, link);
+        pmem_smfn = pmem->smfn;
+        pmem_emfn = pmem->emfn;
+
+        if ( smfn < pmem_smfn )
+            return false;
+
+        if ( emfn <= pmem_emfn )
+            return true;
+
+        smfn = max(smfn, pmem_emfn);
+    }
+
+    return false;
+}
+
 /**
  * Add a PMEM region to a list. All PMEM regions in the list are
  * sorted in the ascending order of the start address. A PMEM region,
@@ -600,6 +627,120 @@  int pmem_do_sysctl(struct xen_sysctl_nvdimm_op *nvdimm)
 
 #ifdef CONFIG_X86
 
+static int pmem_assign_page(struct domain *d, struct page_info *pg,
+                            unsigned long gfn)
+{
+    int rc;
+
+    if ( pg->count_info != (PGC_state_free | PGC_pmem_page) )
+        return -EBUSY;
+
+    pg->count_info = PGC_allocated | PGC_state_inuse | PGC_pmem_page | 1;
+    pg->u.inuse.type_info = 0;
+    page_set_owner(pg, d);
+
+    rc = guest_physmap_add_page(d, _gfn(gfn), _mfn(page_to_mfn(pg)), 0);
+    if ( rc )
+    {
+        page_set_owner(pg, NULL);
+        pg->count_info = PGC_state_free | PGC_pmem_page;
+
+        return rc;
+    }
+
+    spin_lock(&d->pmem_lock);
+    page_list_add_tail(pg, &d->pmem_page_list);
+    spin_unlock(&d->pmem_lock);
+
+    return 0;
+}
+
+static int pmem_unassign_page(struct domain *d, struct page_info *pg,
+                              unsigned long gfn)
+{
+    int rc;
+
+    spin_lock(&d->pmem_lock);
+    page_list_del(pg, &d->pmem_page_list);
+    spin_unlock(&d->pmem_lock);
+
+    rc = guest_physmap_remove_page(d, _gfn(gfn), _mfn(page_to_mfn(pg)), 0);
+
+    page_set_owner(pg, NULL);
+    pg->count_info = PGC_state_free | PGC_pmem_page;
+
+    return 0;
+}
+
+int pmem_populate(struct xen_pmem_map_args *args)
+{
+    struct domain *d = args->domain;
+    unsigned long i = args->nr_done;
+    unsigned long mfn = args->mfn + i;
+    unsigned long emfn = args->mfn + args->nr_mfns;
+    unsigned long gfn = args->gfn + i;
+    struct page_info *page;
+    int rc = 0, err = 0;
+
+    if ( unlikely(d->is_dying) )
+        return -EINVAL;
+
+    if ( !is_hvm_domain(d) )
+        return -EINVAL;
+
+    spin_lock(&pmem_data_lock);
+
+    if ( !check_cover(&pmem_data_regions, mfn, emfn) )
+    {
+        rc = -ENXIO;
+        goto out;
+    }
+
+    for ( ; mfn < emfn; i++, mfn++, gfn++ )
+    {
+        if ( i != args->nr_done && hypercall_preempt_check() )
+        {
+            args->preempted = 1;
+            rc = -ERESTART;
+            break;
+        }
+
+        page = mfn_to_page(mfn);
+        if ( !page_state_is(page, free) )
+        {
+            rc = -EBUSY;
+            break;
+        }
+
+        rc = pmem_assign_page(d, page, gfn);
+        if ( rc )
+            break;
+    }
+
+ out:
+    if ( rc && rc != -ERESTART )
+        while ( i-- && !err )
+            err = pmem_unassign_page(d, mfn_to_page(--mfn), --gfn);
+
+    spin_unlock(&pmem_data_lock);
+
+    if ( unlikely(err) )
+    {
+        /*
+         * If we unfortunately fails to recover from the previous
+         * failure, some PMEM pages may still be mapped to the
+         * domain. As pmem_populate() is now called only during domain
+         * creation, let's crash the domain.
+         */
+        domain_crash(d);
+        rc = err;
+    }
+
+    args->nr_done = i;
+
+    return rc;
+}
+
 int __init pmem_dom0_setup_permission(struct domain *d)
 {
     struct list_head *cur;
diff --git a/xen/include/xen/pmem.h b/xen/include/xen/pmem.h
index 9323d679a6..2dab90530b 100644
--- a/xen/include/xen/pmem.h
+++ b/xen/include/xen/pmem.h
@@ -33,6 +33,20 @@  int pmem_arch_setup(unsigned long smfn, unsigned long emfn, unsigned int pxm,
                     unsigned long mgmt_smfn, unsigned long mgmt_emfn,
                     unsigned long *used_mgmt_mfns);
 
+struct xen_pmem_map_args {
+    struct domain *domain;
+
+    unsigned long mfn;     /* start MFN of pmems page to be mapped */
+    unsigned long gfn;     /* start GFN of target domain */
+    unsigned long nr_mfns; /* number of pmem pages to be mapped */
+
+    /* For preemption ... */
+    unsigned long nr_done; /* number of pmem pages processed so far */
+    int preempted;         /* Is the operation preempted? */
+};
+
+int pmem_populate(struct xen_pmem_map_args *args);
+
 #else /* !CONFIG_X86 */
 
 static inline int pmem_dom0_setup_permission(...)
@@ -45,6 +59,11 @@  static inline int pmem_arch_setup(...)
     return -ENOSYS;
 }
 
+static inline int pmem_populate(...)
+{
+    return -ENOSYS;
+}
+
 #endif /* CONFIG_X86 */
 
 #endif /* CONFIG_NVDIMM_PMEM */
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 5b8f8c68ea..de5b85b1dd 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -323,6 +323,9 @@  struct domain
     atomic_t         shr_pages;       /* number of shared pages             */
     atomic_t         paged_pages;     /* number of paged-out pages          */
 
+    spinlock_t       pmem_lock;       /* protect all following pmem_ fields */
+    struct page_list_head pmem_page_list; /* linked list of PMEM pages      */
+
     /* Scheduling. */
     void            *sched_priv;    /* scheduler-specific data */
     struct cpupool  *cpupool;