diff mbox

[RFC,XEN,v2,05/15] xen/x86: add XENMEM_populate_pmem_map to map host pmem pages to HVM domain

Message ID 20170320000949.24675-6-haozhong.zhang@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Haozhong Zhang March 20, 2017, 12:09 a.m. UTC
XENMEM_populate_pmemmap is used by toolstack to map the specified host
pmem pages to the specified guest physical address. Only pmem pages
that have been setup via XEN_SYSCTL_nvdimm_pmem_setup can be mapped
via XENMEM_populate_pmem_map. Because XEN_SYSCTL_nvdimm_pmem_setup only
works on x86, XENMEM_populate_pmem_map is made to work only on x86 as
well and return -ENOSYS on other architectures.

Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
---
Cc: Daniel De Graaf <dgdegra@tycho.nsa.gov>
Cc: Ian Jackson <ian.jackson@eu.citrix.com>
Cc: Wei Liu <wei.liu2@citrix.com>
Cc: Jan Beulich <jbeulich@suse.com>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>

Changes in v2:
 * Rename *_pmemmap to *_pmem_map.
 * Add XSM check fo XENMEM_populate_pmem_map.
 * Add compat code for XENMEM_populate_pmem_map.
 * Add stub for pmem_populate() on non-x86 architectures.
 * Add check to avoid populate pmem pages to dom0.
 * Merge v1 patch 5 "xen/x86: release pmem pages at domain destroy".
---
 tools/flask/policy/modules/xen.if   |   2 +-
 tools/libxc/include/xenctrl.h       |  17 ++++
 tools/libxc/xc_domain.c             |  15 ++++
 xen/arch/x86/domain.c               |   7 ++
 xen/common/compat/memory.c          |   1 +
 xen/common/domain.c                 |   3 +
 xen/common/memory.c                 |  43 ++++++++++
 xen/common/pmem.c                   | 151 +++++++++++++++++++++++++++++++++++-
 xen/include/public/memory.h         |  14 +++-
 xen/include/xen/pmem.h              |  24 ++++++
 xen/include/xen/sched.h             |   3 +
 xen/include/xsm/dummy.h             |  11 +++
 xen/include/xsm/xsm.h               |  12 +++
 xen/xsm/dummy.c                     |   4 +
 xen/xsm/flask/hooks.c               |  13 ++++
 xen/xsm/flask/policy/access_vectors |   2 +
 16 files changed, 317 insertions(+), 5 deletions(-)
diff mbox

Patch

diff --git a/tools/flask/policy/modules/xen.if b/tools/flask/policy/modules/xen.if
index ed0df4f010..bc4176c089 100644
--- a/tools/flask/policy/modules/xen.if
+++ b/tools/flask/policy/modules/xen.if
@@ -55,7 +55,7 @@  define(`create_domain_common', `
 			psr_cmt_op psr_cat_op soft_reset };
 	allow $1 $2:security check_context;
 	allow $1 $2:shadow enable;
-	allow $1 $2:mmu { map_read map_write adjust memorymap physmap pinpage mmuext_op updatemp };
+	allow $1 $2:mmu { map_read map_write adjust memorymap physmap pinpage mmuext_op updatemp populate_pmem_map };
 	allow $1 $2:grant setup;
 	allow $1 $2:hvm { cacheattr getparam hvmctl sethvmc
 			setparam nested altp2mhvm altp2mhvm_op dm };
diff --git a/tools/libxc/include/xenctrl.h b/tools/libxc/include/xenctrl.h
index d4e3002c9e..f8a9581506 100644
--- a/tools/libxc/include/xenctrl.h
+++ b/tools/libxc/include/xenctrl.h
@@ -2553,6 +2553,23 @@  int xc_nvdimm_pmem_setup(xc_interface *xch,
                          unsigned long smfn, unsigned long emfn,
                          unsigned long mgmt_smfn, unsigned long mgmt_emfn);
 
+/*
+ * Map host pmem pages to a domain.
+ *
+ * Parameters:
+ *  xch:     xc interface handler
+ *  domid:   the target domain id
+ *  mfn:     start MFN of the host pmem pages to be mapped
+ *  nr_mfns: the number of host pmem pages to be mapped
+ *  gfn:     start GFN of the target guest physical pages
+ *
+ * Return:
+ *  0 on success; non-zero error code for failures.
+ */
+int xc_domain_populate_pmemmap(xc_interface *xch, uint32_t domid,
+                               unsigned long mfn, unsigned long gfn,
+                               unsigned long nr_mfns);
+
 /* Compat shims */
 #include "xenctrl_compat.h"
 
diff --git a/tools/libxc/xc_domain.c b/tools/libxc/xc_domain.c
index d862e537d9..9ccdda086d 100644
--- a/tools/libxc/xc_domain.c
+++ b/tools/libxc/xc_domain.c
@@ -2291,6 +2291,21 @@  int xc_domain_soft_reset(xc_interface *xch,
     domctl.domain = (domid_t)domid;
     return do_domctl(xch, &domctl);
 }
+
+int xc_domain_populate_pmemmap(xc_interface *xch, uint32_t domid,
+                               unsigned long mfn, unsigned long gfn,
+                               unsigned long nr_mfns)
+{
+    struct xen_pmem_map args = {
+        .domid   = domid,
+        .mfn     = mfn,
+        .gfn     = gfn,
+        .nr_mfns = nr_mfns,
+    };
+
+    return do_memory_op(xch, XENMEM_populate_pmem_map, &args, sizeof(args));
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index 479aee641f..2333603f3e 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -36,6 +36,7 @@ 
 #include <xen/wait.h>
 #include <xen/guest_access.h>
 #include <xen/livepatch.h>
+#include <xen/pmem.h>
 #include <public/sysctl.h>
 #include <public/hvm/hvm_vcpu.h>
 #include <asm/regs.h>
@@ -2352,6 +2353,12 @@  int domain_relinquish_resources(struct domain *d)
         if ( ret )
             return ret;
 
+#ifdef CONFIG_PMEM
+        ret = pmem_teardown(d);
+        if ( ret )
+            return ret;
+#endif /* CONFIG_PMEM */
+
         /* Tear down paging-assistance stuff. */
         ret = paging_teardown(d);
         if ( ret )
diff --git a/xen/common/compat/memory.c b/xen/common/compat/memory.c
index a37a948331..19382f6dfc 100644
--- a/xen/common/compat/memory.c
+++ b/xen/common/compat/memory.c
@@ -523,6 +523,7 @@  int compat_memory_op(unsigned int cmd, XEN_GUEST_HANDLE_PARAM(void) compat)
         case XENMEM_add_to_physmap:
         case XENMEM_remove_from_physmap:
         case XENMEM_access_op:
+        case XENMEM_populate_pmem_map:
             break;
 
         case XENMEM_get_vnumainfo:
diff --git a/xen/common/domain.c b/xen/common/domain.c
index 4492c9c3d5..f8b4bd9c29 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -287,6 +287,9 @@  struct domain *domain_create(domid_t domid, unsigned int domcr_flags,
     INIT_PAGE_LIST_HEAD(&d->page_list);
     INIT_PAGE_LIST_HEAD(&d->xenpage_list);
 
+    spin_lock_init_prof(d, pmem_lock);
+    INIT_PAGE_LIST_HEAD(&d->pmem_page_list);
+
     spin_lock_init(&d->node_affinity_lock);
     d->node_affinity = NODE_MASK_ALL;
     d->auto_node_affinity = 1;
diff --git a/xen/common/memory.c b/xen/common/memory.c
index ad0b33ceb6..0883d2d9b8 100644
--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -23,6 +23,7 @@ 
 #include <xen/numa.h>
 #include <xen/mem_access.h>
 #include <xen/trace.h>
+#include <xen/pmem.h>
 #include <asm/current.h>
 #include <asm/hardirq.h>
 #include <asm/p2m.h>
@@ -1328,6 +1329,48 @@  long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
     }
 #endif
 
+#ifdef CONFIG_PMEM
+    case XENMEM_populate_pmem_map:
+    {
+        struct xen_pmem_map map;
+        struct xen_pmem_map_args args;
+
+        if ( copy_from_guest(&map, arg, 1) )
+            return -EFAULT;
+
+        if ( map.domid == DOMID_SELF )
+            return -EINVAL;
+
+        d = rcu_lock_domain_by_any_id(map.domid);
+        if ( !d )
+            return -EINVAL;
+
+        rc = xsm_populate_pmem_map(XSM_TARGET, curr_d, d);
+        if ( rc )
+        {
+            rcu_unlock_domain(d);
+            return rc;
+        }
+
+        args.domain = d;
+        args.mfn = map.mfn;
+        args.gfn = map.gfn;
+        args.nr_mfns = map.nr_mfns;
+        args.nr_done = start_extent;
+        args.preempted = 0;
+
+        rc = pmem_populate(&args);
+        rcu_unlock_domain(d);
+
+        if ( rc == -ERESTART && args.preempted )
+            return hypercall_create_continuation(
+                __HYPERVISOR_memory_op, "lh",
+                op | (args.nr_done << MEMOP_EXTENT_SHIFT), arg);
+
+        break;
+    }
+#endif /* CONFIG_PMEM */
+
     default:
         rc = arch_memory_op(cmd, arg);
         break;
diff --git a/xen/common/pmem.c b/xen/common/pmem.c
index 0e2d66f94c..03f1c1b374 100644
--- a/xen/common/pmem.c
+++ b/xen/common/pmem.c
@@ -17,9 +17,12 @@ 
  */
 
 #include <xen/errno.h>
+#include <xen/event.h>
 #include <xen/list.h>
 #include <xen/mm.h>
+#include <xen/paging.h>
 #include <xen/pmem.h>
+#include <xen/sched.h>
 #include <xen/spinlock.h>
 
 /*
@@ -130,8 +133,9 @@  static struct pmem *get_first_overlap(const struct list_head *list,
     return overlap;
 }
 
-static bool pmem_list_covered(const struct list_head *list,
-                              unsigned long smfn, unsigned emfn)
+static bool pmem_list_covered_ready(const struct list_head *list,
+                                    unsigned long smfn, unsigned emfn,
+                                    bool check_ready)
 {
     struct pmem *overlap;
     bool covered = false;
@@ -139,7 +143,8 @@  static bool pmem_list_covered(const struct list_head *list,
     do {
         overlap = get_first_overlap(list, smfn, emfn);
 
-        if ( !overlap || smfn < overlap->smfn )
+        if ( !overlap || smfn < overlap->smfn ||
+             (check_ready && !overlap->ready) )
             break;
 
         if ( emfn <= overlap->emfn )
@@ -155,6 +160,12 @@  static bool pmem_list_covered(const struct list_head *list,
     return covered;
 }
 
+static bool pmem_list_covered(const struct list_head *list,
+                              unsigned long smfn, unsigned emfn)
+{
+    return pmem_list_covered_ready(list, smfn, emfn, false);
+}
+
 static bool check_mgmt_size(unsigned long mgmt_mfns, unsigned long total_mfns)
 {
     return mgmt_mfns >=
@@ -301,3 +312,137 @@  int pmem_setup(unsigned long data_smfn, unsigned long data_emfn,
  out:
     return rc;
 }
+
+#ifdef CONFIG_X86
+
+static void pmem_assign_page(struct domain *d, struct page_info *pg,
+                             unsigned long gfn)
+{
+    pg->u.inuse.type_info = 0;
+    page_set_owner(pg, d);
+    guest_physmap_add_page(d, _gfn(gfn), _mfn(page_to_mfn(pg)), 0);
+
+    spin_lock(&d->pmem_lock);
+    page_list_add_tail(pg, &d->pmem_page_list);
+    spin_unlock(&d->pmem_lock);
+}
+
+static void pmem_unassign_page(struct domain *d, struct page_info *pg,
+                               unsigned long gfn)
+{
+    spin_lock(&d->pmem_lock);
+    page_list_del(pg, &d->pmem_page_list);
+    spin_unlock(&d->pmem_lock);
+
+    guest_physmap_remove_page(d, _gfn(gfn), _mfn(page_to_mfn(pg)), 0);
+    page_set_owner(pg, NULL);
+    pg->count_info = (pg->count_info & ~PGC_count_mask) | PGC_state_free;
+}
+
+static void pmem_unassign_pages(struct domain *d, unsigned long mfn,
+                                unsigned long gfn, unsigned long nr_mfns)
+{
+    unsigned long emfn = mfn + nr_mfns;
+
+    for ( ; mfn < emfn; mfn++, gfn++ )
+        pmem_unassign_page(d, mfn_to_page(mfn), gfn);
+}
+
+/**
+ * Map host pmem pages to a domain. Currently only HVM domain is
+ * supported.
+ *
+ * Parameters:
+ *  args: please refer to comments of struct xen_pmemmap_args in xen/pmem.h
+ *
+ * Return:
+ *  0 on success; non-zero error code on failures.
+ */
+int pmem_populate(struct xen_pmem_map_args *args)
+{
+    struct domain *d = args->domain;
+    unsigned long i = args->nr_done;
+    unsigned long mfn = args->mfn + i;
+    unsigned long emfn = args->mfn + args->nr_mfns;
+    unsigned long gfn;
+    struct page_info *page;
+    int rc = 0;
+
+    if ( unlikely(d->is_dying) )
+        return -EINVAL;
+
+    if ( !has_hvm_container_domain(d) || !paging_mode_translate(d) )
+        return -EINVAL;
+
+    spin_lock(&pmem_gregions_lock);
+    if ( !pmem_list_covered_ready(&pmem_gregions, mfn, emfn, true) )
+    {
+        spin_unlock(&pmem_regions_lock);
+        return -EINVAL;
+    }
+    spin_unlock(&pmem_gregions_lock);
+
+    for ( gfn = args->gfn + i; mfn < emfn; i++, mfn++, gfn++ )
+    {
+        if ( i != args->nr_done && hypercall_preempt_check() )
+        {
+            args->preempted = 1;
+            rc = -ERESTART;
+            break;
+        }
+
+        page = mfn_to_page(mfn);
+
+        spin_lock(&pmem_gregions_lock);
+        if ( !page_state_is(page, free) )
+        {
+            dprintk(XENLOG_DEBUG, "pmem: mfn 0x%lx not in free state\n", mfn);
+            spin_unlock(&pmem_gregions_lock);
+            rc = -EINVAL;
+            break;
+        }
+        page->count_info = PGC_state_inuse | 1;
+        spin_unlock(&pmem_gregions_lock);
+
+        pmem_assign_page(d, page, gfn);
+    }
+
+    if ( rc && rc != -ERESTART )
+        pmem_unassign_pages(d, args->mfn, args->gfn, i);
+
+    args->nr_done = i;
+    return rc;
+}
+
+int pmem_teardown(struct domain *d)
+{
+    struct page_info *pg, *next;
+    int rc = 0;
+
+    ASSERT(d->is_dying);
+    ASSERT(d != current->domain);
+
+    spin_lock(&d->pmem_lock);
+
+    page_list_for_each_safe (pg, next, &d->pmem_page_list )
+    {
+        BUG_ON(page_get_owner(pg) != d);
+        BUG_ON(page_state_is(pg, free));
+
+        page_list_del(pg, &d->pmem_page_list);
+        page_set_owner(pg, NULL);
+        pg->count_info = (pg->count_info & ~PGC_count_mask) | PGC_state_free;
+
+        if ( hypercall_preempt_check() )
+        {
+            rc = -ERESTART;
+            break;
+        }
+    }
+
+    spin_unlock(&d->pmem_lock);
+
+    return rc;
+}
+
+#endif /* CONFIG_X86 */
diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h
index 6eee0c8a16..fa636b313a 100644
--- a/xen/include/public/memory.h
+++ b/xen/include/public/memory.h
@@ -648,7 +648,19 @@  struct xen_vnuma_topology_info {
 typedef struct xen_vnuma_topology_info xen_vnuma_topology_info_t;
 DEFINE_XEN_GUEST_HANDLE(xen_vnuma_topology_info_t);
 
-/* Next available subop number is 28 */
+#define XENMEM_populate_pmem_map 28
+
+struct xen_pmem_map {
+    /* IN */
+    domid_t domid;
+    unsigned long mfn;
+    unsigned long gfn;
+    unsigned int nr_mfns;
+};
+typedef struct xen_pmem_map xen_pmem_map_t;
+DEFINE_XEN_GUEST_HANDLE(xen_pmem_map_t);
+
+/* Next available subop number is 29 */
 
 #endif /* __XEN_PUBLIC_MEMORY_H__ */
 
diff --git a/xen/include/xen/pmem.h b/xen/include/xen/pmem.h
index 95c8207ff6..cbc621048b 100644
--- a/xen/include/xen/pmem.h
+++ b/xen/include/xen/pmem.h
@@ -26,9 +26,23 @@  int pmem_register(unsigned long smfn, unsigned long emfn);
 int pmem_setup(unsigned long data_spfn, unsigned long data_emfn,
                unsigned long mgmt_smfn, unsigned long mgmt_emfn);
 
+struct xen_pmem_map_args {
+    struct domain *domain;
+
+    unsigned long mfn;     /* start MFN of pmems page to be mapped */
+    unsigned long gfn;     /* start GFN of target domain */
+    unsigned long nr_mfns; /* number of pmem pages to be mapped */
+
+    /* For preemption ... */
+    unsigned long nr_done; /* number of pmem pages processed so far */
+    int preempted;         /* Is the operation preempted? */
+};
+
 #ifdef CONFIG_X86
 int pmem_arch_setup(unsigned long data_smfn, unsigned long data_emfn,
                     unsigned long mgmt_smfn, unsigned long mgmt_emfn);
+int pmem_populate(struct xen_pmem_map_args *args);
+int pmem_teardown(struct domain *d);
 #else /* !CONFIG_X86 */
 static inline int
 pmem_arch_setup(unsigned long data_smfn, unsigned long data_emfn,
@@ -36,6 +50,16 @@  pmem_arch_setup(unsigned long data_smfn, unsigned long data_emfn,
 {
     return -ENOSYS;
 }
+
+static inline int pmem_populate(struct xen_pmem_map_args *args)
+{
+    return -ENOSYS;
+}
+
+static inline int pmem_teardown(struct domain *d)
+{
+    return -ENOSYS;
+}
 #endif /* CONFIG_X86 */
 
 #endif /* CONFIG_PMEM */
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 0929c0b910..39057243d6 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -336,6 +336,9 @@  struct domain
     atomic_t         shr_pages;       /* number of shared pages             */
     atomic_t         paged_pages;     /* number of paged-out pages          */
 
+    spinlock_t       pmem_lock;       /* protect all following pmem_ fields */
+    struct page_list_head pmem_page_list; /* linked list of pmem pages      */
+
     /* Scheduling. */
     void            *sched_priv;    /* scheduler-specific data */
     struct cpupool  *cpupool;
diff --git a/xen/include/xsm/dummy.h b/xen/include/xsm/dummy.h
index 4b27ae72de..aea0b9376f 100644
--- a/xen/include/xsm/dummy.h
+++ b/xen/include/xsm/dummy.h
@@ -728,3 +728,14 @@  static XSM_INLINE int xsm_xen_version (XSM_DEFAULT_ARG uint32_t op)
         return xsm_default_action(XSM_PRIV, current->domain, NULL);
     }
 }
+
+#ifdef CONFIG_PMEM
+
+static XSM_INLINE int xsm_populate_pmem_map(XSM_DEFAULT_ARG
+                                            struct domain *d1, struct domain *d2)
+{
+    XSM_ASSERT_ACTION(XSM_TARGET);
+    return xsm_default_action(action, d1, d2);
+}
+
+#endif /* CONFIG_PMEM */
diff --git a/xen/include/xsm/xsm.h b/xen/include/xsm/xsm.h
index 2cf7ac10db..8f62b21739 100644
--- a/xen/include/xsm/xsm.h
+++ b/xen/include/xsm/xsm.h
@@ -182,6 +182,10 @@  struct xsm_operations {
     int (*dm_op) (struct domain *d);
 #endif
     int (*xen_version) (uint32_t cmd);
+
+#ifdef CONFIG_PMEM
+    int (*populate_pmem_map) (struct domain *d1, struct domain *d2);
+#endif /* CONFIG_PMEM */
 };
 
 #ifdef CONFIG_XSM
@@ -705,6 +709,14 @@  static inline int xsm_xen_version (xsm_default_t def, uint32_t op)
     return xsm_ops->xen_version(op);
 }
 
+#ifdef CONFIG_PMEM
+static inline int xsm_populate_pmem_map(xsm_default_t def,
+                                        struct domain *d1, struct domain *d2)
+{
+    return xsm_ops->populate_pmem_map(d1, d2);
+}
+#endif /* CONFIG_PMEM */
+
 #endif /* XSM_NO_WRAPPERS */
 
 #ifdef CONFIG_MULTIBOOT
diff --git a/xen/xsm/dummy.c b/xen/xsm/dummy.c
index 3cb5492dd3..dde68ecf59 100644
--- a/xen/xsm/dummy.c
+++ b/xen/xsm/dummy.c
@@ -159,4 +159,8 @@  void __init xsm_fixup_ops (struct xsm_operations *ops)
     set_to_dummy_if_null(ops, dm_op);
 #endif
     set_to_dummy_if_null(ops, xen_version);
+
+#ifdef CONFIG_PMEM
+    set_to_dummy_if_null(ops, populate_pmem_map);
+#endif
 }
diff --git a/xen/xsm/flask/hooks.c b/xen/xsm/flask/hooks.c
index e3c77bbe3f..582ddf81d3 100644
--- a/xen/xsm/flask/hooks.c
+++ b/xen/xsm/flask/hooks.c
@@ -1659,6 +1659,15 @@  static int flask_xen_version (uint32_t op)
     }
 }
 
+#ifdef CONFIG_PMEM
+
+static int flask_populate_pmem_map(struct domain *d1, struct domain *d2)
+{
+    return domain_has_perm(d1, d2, SECCLASS_MMU, MMU__POPULATE_PMEM_MAP);
+}
+
+#endif /* CONFIG_PMEM */
+
 long do_flask_op(XEN_GUEST_HANDLE_PARAM(xsm_op_t) u_flask_op);
 int compat_flask_op(XEN_GUEST_HANDLE_PARAM(xsm_op_t) u_flask_op);
 
@@ -1794,6 +1803,10 @@  static struct xsm_operations flask_ops = {
     .dm_op = flask_dm_op,
 #endif
     .xen_version = flask_xen_version,
+
+#ifdef CONFIG_PMEM
+    .populate_pmem_map = flask_populate_pmem_map,
+#endif /* CONFIG_PMEM */
 };
 
 void __init flask_init(const void *policy_buffer, size_t policy_size)
diff --git a/xen/xsm/flask/policy/access_vectors b/xen/xsm/flask/policy/access_vectors
index a8ddd7ca84..44cbd66f4d 100644
--- a/xen/xsm/flask/policy/access_vectors
+++ b/xen/xsm/flask/policy/access_vectors
@@ -385,6 +385,8 @@  class mmu
 # Allow a privileged domain to install a map of a page it does not own.  Used
 # for stub domain device models with the PV framebuffer.
     target_hack
+# XENMEM_populate_pmem_map
+    populate_pmem_map
 }
 
 # control of the paging_domctl split by subop