@@ -55,7 +55,7 @@ define(`create_domain_common', `
psr_cmt_op psr_cat_op soft_reset };
allow $1 $2:security check_context;
allow $1 $2:shadow enable;
- allow $1 $2:mmu { map_read map_write adjust memorymap physmap pinpage mmuext_op updatemp };
+ allow $1 $2:mmu { map_read map_write adjust memorymap physmap pinpage mmuext_op updatemp populate_pmem_map };
allow $1 $2:grant setup;
allow $1 $2:hvm { cacheattr getparam hvmctl sethvmc
setparam nested altp2mhvm altp2mhvm_op dm };
@@ -2553,6 +2553,23 @@ int xc_nvdimm_pmem_setup(xc_interface *xch,
unsigned long smfn, unsigned long emfn,
unsigned long mgmt_smfn, unsigned long mgmt_emfn);
+/*
+ * Map host pmem pages to a domain.
+ *
+ * Parameters:
+ * xch: xc interface handler
+ * domid: the target domain id
+ * mfn: start MFN of the host pmem pages to be mapped
+ * nr_mfns: the number of host pmem pages to be mapped
+ * gfn: start GFN of the target guest physical pages
+ *
+ * Return:
+ * 0 on success; non-zero error code for failures.
+ */
+int xc_domain_populate_pmemmap(xc_interface *xch, uint32_t domid,
+ unsigned long mfn, unsigned long gfn,
+ unsigned long nr_mfns);
+
/* Compat shims */
#include "xenctrl_compat.h"
@@ -2291,6 +2291,21 @@ int xc_domain_soft_reset(xc_interface *xch,
domctl.domain = (domid_t)domid;
return do_domctl(xch, &domctl);
}
+
+int xc_domain_populate_pmemmap(xc_interface *xch, uint32_t domid,
+ unsigned long mfn, unsigned long gfn,
+ unsigned long nr_mfns)
+{
+ struct xen_pmem_map args = {
+ .domid = domid,
+ .mfn = mfn,
+ .gfn = gfn,
+ .nr_mfns = nr_mfns,
+ };
+
+ return do_memory_op(xch, XENMEM_populate_pmem_map, &args, sizeof(args));
+}
+
/*
* Local variables:
* mode: C
@@ -36,6 +36,7 @@
#include <xen/wait.h>
#include <xen/guest_access.h>
#include <xen/livepatch.h>
+#include <xen/pmem.h>
#include <public/sysctl.h>
#include <public/hvm/hvm_vcpu.h>
#include <asm/regs.h>
@@ -2352,6 +2353,12 @@ int domain_relinquish_resources(struct domain *d)
if ( ret )
return ret;
+#ifdef CONFIG_PMEM
+ ret = pmem_teardown(d);
+ if ( ret )
+ return ret;
+#endif /* CONFIG_PMEM */
+
/* Tear down paging-assistance stuff. */
ret = paging_teardown(d);
if ( ret )
@@ -523,6 +523,7 @@ int compat_memory_op(unsigned int cmd, XEN_GUEST_HANDLE_PARAM(void) compat)
case XENMEM_add_to_physmap:
case XENMEM_remove_from_physmap:
case XENMEM_access_op:
+ case XENMEM_populate_pmem_map:
break;
case XENMEM_get_vnumainfo:
@@ -287,6 +287,9 @@ struct domain *domain_create(domid_t domid, unsigned int domcr_flags,
INIT_PAGE_LIST_HEAD(&d->page_list);
INIT_PAGE_LIST_HEAD(&d->xenpage_list);
+ spin_lock_init_prof(d, pmem_lock);
+ INIT_PAGE_LIST_HEAD(&d->pmem_page_list);
+
spin_lock_init(&d->node_affinity_lock);
d->node_affinity = NODE_MASK_ALL;
d->auto_node_affinity = 1;
@@ -23,6 +23,7 @@
#include <xen/numa.h>
#include <xen/mem_access.h>
#include <xen/trace.h>
+#include <xen/pmem.h>
#include <asm/current.h>
#include <asm/hardirq.h>
#include <asm/p2m.h>
@@ -1328,6 +1329,48 @@ long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
}
#endif
+#ifdef CONFIG_PMEM
+ case XENMEM_populate_pmem_map:
+ {
+ struct xen_pmem_map map;
+ struct xen_pmem_map_args args;
+
+ if ( copy_from_guest(&map, arg, 1) )
+ return -EFAULT;
+
+ if ( map.domid == DOMID_SELF )
+ return -EINVAL;
+
+ d = rcu_lock_domain_by_any_id(map.domid);
+ if ( !d )
+ return -EINVAL;
+
+ rc = xsm_populate_pmem_map(XSM_TARGET, curr_d, d);
+ if ( rc )
+ {
+ rcu_unlock_domain(d);
+ return rc;
+ }
+
+ args.domain = d;
+ args.mfn = map.mfn;
+ args.gfn = map.gfn;
+ args.nr_mfns = map.nr_mfns;
+ args.nr_done = start_extent;
+ args.preempted = 0;
+
+ rc = pmem_populate(&args);
+ rcu_unlock_domain(d);
+
+ if ( rc == -ERESTART && args.preempted )
+ return hypercall_create_continuation(
+ __HYPERVISOR_memory_op, "lh",
+ op | (args.nr_done << MEMOP_EXTENT_SHIFT), arg);
+
+ break;
+ }
+#endif /* CONFIG_PMEM */
+
default:
rc = arch_memory_op(cmd, arg);
break;
@@ -17,9 +17,12 @@
*/
#include <xen/errno.h>
+#include <xen/event.h>
#include <xen/list.h>
#include <xen/mm.h>
+#include <xen/paging.h>
#include <xen/pmem.h>
+#include <xen/sched.h>
#include <xen/spinlock.h>
/*
@@ -130,8 +133,9 @@ static struct pmem *get_first_overlap(const struct list_head *list,
return overlap;
}
-static bool pmem_list_covered(const struct list_head *list,
- unsigned long smfn, unsigned emfn)
+static bool pmem_list_covered_ready(const struct list_head *list,
+ unsigned long smfn, unsigned emfn,
+ bool check_ready)
{
struct pmem *overlap;
bool covered = false;
@@ -139,7 +143,8 @@ static bool pmem_list_covered(const struct list_head *list,
do {
overlap = get_first_overlap(list, smfn, emfn);
- if ( !overlap || smfn < overlap->smfn )
+ if ( !overlap || smfn < overlap->smfn ||
+ (check_ready && !overlap->ready) )
break;
if ( emfn <= overlap->emfn )
@@ -155,6 +160,12 @@ static bool pmem_list_covered(const struct list_head *list,
return covered;
}
+static bool pmem_list_covered(const struct list_head *list,
+ unsigned long smfn, unsigned emfn)
+{
+ return pmem_list_covered_ready(list, smfn, emfn, false);
+}
+
static bool check_mgmt_size(unsigned long mgmt_mfns, unsigned long total_mfns)
{
return mgmt_mfns >=
@@ -301,3 +312,137 @@ int pmem_setup(unsigned long data_smfn, unsigned long data_emfn,
out:
return rc;
}
+
+#ifdef CONFIG_X86
+
+static void pmem_assign_page(struct domain *d, struct page_info *pg,
+ unsigned long gfn)
+{
+ pg->u.inuse.type_info = 0;
+ page_set_owner(pg, d);
+ guest_physmap_add_page(d, _gfn(gfn), _mfn(page_to_mfn(pg)), 0);
+
+ spin_lock(&d->pmem_lock);
+ page_list_add_tail(pg, &d->pmem_page_list);
+ spin_unlock(&d->pmem_lock);
+}
+
+static void pmem_unassign_page(struct domain *d, struct page_info *pg,
+ unsigned long gfn)
+{
+ spin_lock(&d->pmem_lock);
+ page_list_del(pg, &d->pmem_page_list);
+ spin_unlock(&d->pmem_lock);
+
+ guest_physmap_remove_page(d, _gfn(gfn), _mfn(page_to_mfn(pg)), 0);
+ page_set_owner(pg, NULL);
+ pg->count_info = (pg->count_info & ~PGC_count_mask) | PGC_state_free;
+}
+
+static void pmem_unassign_pages(struct domain *d, unsigned long mfn,
+ unsigned long gfn, unsigned long nr_mfns)
+{
+ unsigned long emfn = mfn + nr_mfns;
+
+ for ( ; mfn < emfn; mfn++, gfn++ )
+ pmem_unassign_page(d, mfn_to_page(mfn), gfn);
+}
+
+/**
+ * Map host pmem pages to a domain. Currently only HVM domain is
+ * supported.
+ *
+ * Parameters:
+ * args: please refer to comments of struct xen_pmemmap_args in xen/pmem.h
+ *
+ * Return:
+ * 0 on success; non-zero error code on failures.
+ */
+int pmem_populate(struct xen_pmem_map_args *args)
+{
+ struct domain *d = args->domain;
+ unsigned long i = args->nr_done;
+ unsigned long mfn = args->mfn + i;
+ unsigned long emfn = args->mfn + args->nr_mfns;
+ unsigned long gfn;
+ struct page_info *page;
+ int rc = 0;
+
+ if ( unlikely(d->is_dying) )
+ return -EINVAL;
+
+ if ( !has_hvm_container_domain(d) || !paging_mode_translate(d) )
+ return -EINVAL;
+
+ spin_lock(&pmem_gregions_lock);
+ if ( !pmem_list_covered_ready(&pmem_gregions, mfn, emfn, true) )
+ {
+ spin_unlock(&pmem_regions_lock);
+ return -EINVAL;
+ }
+ spin_unlock(&pmem_gregions_lock);
+
+ for ( gfn = args->gfn + i; mfn < emfn; i++, mfn++, gfn++ )
+ {
+ if ( i != args->nr_done && hypercall_preempt_check() )
+ {
+ args->preempted = 1;
+ rc = -ERESTART;
+ break;
+ }
+
+ page = mfn_to_page(mfn);
+
+ spin_lock(&pmem_gregions_lock);
+ if ( !page_state_is(page, free) )
+ {
+ dprintk(XENLOG_DEBUG, "pmem: mfn 0x%lx not in free state\n", mfn);
+ spin_unlock(&pmem_gregions_lock);
+ rc = -EINVAL;
+ break;
+ }
+ page->count_info = PGC_state_inuse | 1;
+ spin_unlock(&pmem_gregions_lock);
+
+ pmem_assign_page(d, page, gfn);
+ }
+
+ if ( rc && rc != -ERESTART )
+ pmem_unassign_pages(d, args->mfn, args->gfn, i);
+
+ args->nr_done = i;
+ return rc;
+}
+
+int pmem_teardown(struct domain *d)
+{
+ struct page_info *pg, *next;
+ int rc = 0;
+
+ ASSERT(d->is_dying);
+ ASSERT(d != current->domain);
+
+ spin_lock(&d->pmem_lock);
+
+ page_list_for_each_safe (pg, next, &d->pmem_page_list )
+ {
+ BUG_ON(page_get_owner(pg) != d);
+ BUG_ON(page_state_is(pg, free));
+
+ page_list_del(pg, &d->pmem_page_list);
+ page_set_owner(pg, NULL);
+ pg->count_info = (pg->count_info & ~PGC_count_mask) | PGC_state_free;
+
+ if ( hypercall_preempt_check() )
+ {
+ rc = -ERESTART;
+ break;
+ }
+ }
+
+ spin_unlock(&d->pmem_lock);
+
+ return rc;
+}
+
+#endif /* CONFIG_X86 */
@@ -648,7 +648,19 @@ struct xen_vnuma_topology_info {
typedef struct xen_vnuma_topology_info xen_vnuma_topology_info_t;
DEFINE_XEN_GUEST_HANDLE(xen_vnuma_topology_info_t);
-/* Next available subop number is 28 */
+#define XENMEM_populate_pmem_map 28
+
+struct xen_pmem_map {
+ /* IN */
+ domid_t domid;
+ unsigned long mfn;
+ unsigned long gfn;
+ unsigned int nr_mfns;
+};
+typedef struct xen_pmem_map xen_pmem_map_t;
+DEFINE_XEN_GUEST_HANDLE(xen_pmem_map_t);
+
+/* Next available subop number is 29 */
#endif /* __XEN_PUBLIC_MEMORY_H__ */
@@ -26,9 +26,23 @@ int pmem_register(unsigned long smfn, unsigned long emfn);
int pmem_setup(unsigned long data_spfn, unsigned long data_emfn,
unsigned long mgmt_smfn, unsigned long mgmt_emfn);
+struct xen_pmem_map_args {
+ struct domain *domain;
+
+ unsigned long mfn; /* start MFN of pmems page to be mapped */
+ unsigned long gfn; /* start GFN of target domain */
+ unsigned long nr_mfns; /* number of pmem pages to be mapped */
+
+ /* For preemption ... */
+ unsigned long nr_done; /* number of pmem pages processed so far */
+ int preempted; /* Is the operation preempted? */
+};
+
#ifdef CONFIG_X86
int pmem_arch_setup(unsigned long data_smfn, unsigned long data_emfn,
unsigned long mgmt_smfn, unsigned long mgmt_emfn);
+int pmem_populate(struct xen_pmem_map_args *args);
+int pmem_teardown(struct domain *d);
#else /* !CONFIG_X86 */
static inline int
pmem_arch_setup(unsigned long data_smfn, unsigned long data_emfn,
@@ -36,6 +50,16 @@ pmem_arch_setup(unsigned long data_smfn, unsigned long data_emfn,
{
return -ENOSYS;
}
+
+static inline int pmem_populate(struct xen_pmem_map_args *args)
+{
+ return -ENOSYS;
+}
+
+static inline int pmem_teardown(struct domain *d)
+{
+ return -ENOSYS;
+}
#endif /* CONFIG_X86 */
#endif /* CONFIG_PMEM */
@@ -336,6 +336,9 @@ struct domain
atomic_t shr_pages; /* number of shared pages */
atomic_t paged_pages; /* number of paged-out pages */
+ spinlock_t pmem_lock; /* protect all following pmem_ fields */
+ struct page_list_head pmem_page_list; /* linked list of pmem pages */
+
/* Scheduling. */
void *sched_priv; /* scheduler-specific data */
struct cpupool *cpupool;
@@ -728,3 +728,14 @@ static XSM_INLINE int xsm_xen_version (XSM_DEFAULT_ARG uint32_t op)
return xsm_default_action(XSM_PRIV, current->domain, NULL);
}
}
+
+#ifdef CONFIG_PMEM
+
+static XSM_INLINE int xsm_populate_pmem_map(XSM_DEFAULT_ARG
+ struct domain *d1, struct domain *d2)
+{
+ XSM_ASSERT_ACTION(XSM_TARGET);
+ return xsm_default_action(action, d1, d2);
+}
+
+#endif /* CONFIG_PMEM */
@@ -182,6 +182,10 @@ struct xsm_operations {
int (*dm_op) (struct domain *d);
#endif
int (*xen_version) (uint32_t cmd);
+
+#ifdef CONFIG_PMEM
+ int (*populate_pmem_map) (struct domain *d1, struct domain *d2);
+#endif /* CONFIG_PMEM */
};
#ifdef CONFIG_XSM
@@ -705,6 +709,14 @@ static inline int xsm_xen_version (xsm_default_t def, uint32_t op)
return xsm_ops->xen_version(op);
}
+#ifdef CONFIG_PMEM
+static inline int xsm_populate_pmem_map(xsm_default_t def,
+ struct domain *d1, struct domain *d2)
+{
+ return xsm_ops->populate_pmem_map(d1, d2);
+}
+#endif /* CONFIG_PMEM */
+
#endif /* XSM_NO_WRAPPERS */
#ifdef CONFIG_MULTIBOOT
@@ -159,4 +159,8 @@ void __init xsm_fixup_ops (struct xsm_operations *ops)
set_to_dummy_if_null(ops, dm_op);
#endif
set_to_dummy_if_null(ops, xen_version);
+
+#ifdef CONFIG_PMEM
+ set_to_dummy_if_null(ops, populate_pmem_map);
+#endif
}
@@ -1659,6 +1659,15 @@ static int flask_xen_version (uint32_t op)
}
}
+#ifdef CONFIG_PMEM
+
+static int flask_populate_pmem_map(struct domain *d1, struct domain *d2)
+{
+ return domain_has_perm(d1, d2, SECCLASS_MMU, MMU__POPULATE_PMEM_MAP);
+}
+
+#endif /* CONFIG_PMEM */
+
long do_flask_op(XEN_GUEST_HANDLE_PARAM(xsm_op_t) u_flask_op);
int compat_flask_op(XEN_GUEST_HANDLE_PARAM(xsm_op_t) u_flask_op);
@@ -1794,6 +1803,10 @@ static struct xsm_operations flask_ops = {
.dm_op = flask_dm_op,
#endif
.xen_version = flask_xen_version,
+
+#ifdef CONFIG_PMEM
+ .populate_pmem_map = flask_populate_pmem_map,
+#endif /* CONFIG_PMEM */
};
void __init flask_init(const void *policy_buffer, size_t policy_size)
@@ -385,6 +385,8 @@ class mmu
# Allow a privileged domain to install a map of a page it does not own. Used
# for stub domain device models with the PV framebuffer.
target_hack
+# XENMEM_populate_pmem_map
+ populate_pmem_map
}
# control of the paging_domctl split by subop
XENMEM_populate_pmemmap is used by toolstack to map the specified host pmem pages to the specified guest physical address. Only pmem pages that have been setup via XEN_SYSCTL_nvdimm_pmem_setup can be mapped via XENMEM_populate_pmem_map. Because XEN_SYSCTL_nvdimm_pmem_setup only works on x86, XENMEM_populate_pmem_map is made to work only on x86 as well and return -ENOSYS on other architectures. Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com> --- Cc: Daniel De Graaf <dgdegra@tycho.nsa.gov> Cc: Ian Jackson <ian.jackson@eu.citrix.com> Cc: Wei Liu <wei.liu2@citrix.com> Cc: Jan Beulich <jbeulich@suse.com> Cc: Andrew Cooper <andrew.cooper3@citrix.com> Changes in v2: * Rename *_pmemmap to *_pmem_map. * Add XSM check fo XENMEM_populate_pmem_map. * Add compat code for XENMEM_populate_pmem_map. * Add stub for pmem_populate() on non-x86 architectures. * Add check to avoid populate pmem pages to dom0. * Merge v1 patch 5 "xen/x86: release pmem pages at domain destroy". --- tools/flask/policy/modules/xen.if | 2 +- tools/libxc/include/xenctrl.h | 17 ++++ tools/libxc/xc_domain.c | 15 ++++ xen/arch/x86/domain.c | 7 ++ xen/common/compat/memory.c | 1 + xen/common/domain.c | 3 + xen/common/memory.c | 43 ++++++++++ xen/common/pmem.c | 151 +++++++++++++++++++++++++++++++++++- xen/include/public/memory.h | 14 +++- xen/include/xen/pmem.h | 24 ++++++ xen/include/xen/sched.h | 3 + xen/include/xsm/dummy.h | 11 +++ xen/include/xsm/xsm.h | 12 +++ xen/xsm/dummy.c | 4 + xen/xsm/flask/hooks.c | 13 ++++ xen/xsm/flask/policy/access_vectors | 2 + 16 files changed, 317 insertions(+), 5 deletions(-)