@@ -1913,7 +1913,7 @@ int hvm_hap_nested_page_fault(paddr_t gpa, unsigned long gla,
}
#endif
- /* Spurious fault? PoD and log-dirty also take this path. */
+ /* Spurious fault? PoD, log-dirty and VM forking also take this path. */
if ( p2m_is_ram(p2mt) )
{
rc = 1;
@@ -22,11 +22,13 @@
#include <xen/types.h>
#include <xen/domain_page.h>
+#include <xen/event.h>
#include <xen/spinlock.h>
#include <xen/rwlock.h>
#include <xen/mm.h>
#include <xen/grant_table.h>
#include <xen/sched.h>
+#include <xen/sched-if.h>
#include <xen/rcupdate.h>
#include <xen/guest_access.h>
#include <xen/vm_event.h>
@@ -36,6 +38,9 @@
#include <asm/altp2m.h>
#include <asm/atomic.h>
#include <asm/event.h>
+#include <asm/hap.h>
+#include <asm/hvm/hvm.h>
+#include <asm/hvm/save.h>
#include <xsm/xsm.h>
#include "mm-locks.h"
@@ -1423,6 +1428,200 @@ static inline int mem_sharing_control(struct domain *d, bool enable)
return 0;
}
+/*
+ * Forking a page only gets called when the VM faults due to no entry being
+ * in the EPT for the access. Depending on the type of access we either
+ * populate the physmap with a shared entry for read-only access or
+ * fork the page if its a write access.
+ *
+ * The client p2m is already locked so we only need to lock
+ * the parent's here.
+ */
+int mem_sharing_fork_page(struct domain *d, gfn_t gfn, bool unsharing)
+{
+ int rc = -ENOENT;
+ shr_handle_t handle;
+ struct domain *parent;
+ struct p2m_domain *p2m;
+ unsigned long gfn_l = gfn_x(gfn);
+ mfn_t mfn, new_mfn;
+ p2m_type_t p2mt;
+ struct page_info *page;
+
+ if ( !mem_sharing_is_fork(d) )
+ return -ENOENT;
+
+ parent = d->parent;
+
+ if ( !unsharing )
+ {
+ /* For read-only accesses we just add a shared entry to the physmap */
+ while ( parent )
+ {
+ if ( !(rc = nominate_page(parent, gfn, 0, &handle)) )
+ break;
+
+ parent = parent->parent;
+ }
+
+ if ( !rc )
+ {
+ /* The client's p2m is already locked */
+ struct p2m_domain *pp2m = p2m_get_hostp2m(parent);
+
+ p2m_lock(pp2m);
+ rc = add_to_physmap(parent, gfn_l, handle, d, gfn_l, false);
+ p2m_unlock(pp2m);
+
+ if ( !rc )
+ return 0;
+ }
+ }
+
+ /*
+ * If it's a write access (ie. unsharing) or if adding a shared entry to
+ * the physmap failed we'll fork the page directly.
+ */
+ p2m = p2m_get_hostp2m(d);
+ parent = d->parent;
+
+ while ( parent )
+ {
+ mfn = get_gfn_query(parent, gfn_l, &p2mt);
+
+ if ( mfn_valid(mfn) && p2m_is_any_ram(p2mt) )
+ break;
+
+ put_gfn(parent, gfn_l);
+ parent = parent->parent;
+ }
+
+ if ( !parent )
+ return -ENOENT;
+
+ if ( !(page = alloc_domheap_page(d, 0)) )
+ {
+ put_gfn(parent, gfn_l);
+ return -ENOMEM;
+ }
+
+ new_mfn = page_to_mfn(page);
+ copy_domain_page(new_mfn, mfn);
+ set_gpfn_from_mfn(mfn_x(new_mfn), gfn_l);
+
+ put_gfn(parent, gfn_l);
+
+ return p2m->set_entry(p2m, gfn, new_mfn, PAGE_ORDER_4K, p2m_ram_rw,
+ p2m->default_access, -1);
+}
+
+static int bring_up_vcpus(struct domain *cd, struct cpupool *cpupool)
+{
+ int ret;
+ unsigned int i;
+
+ if ( (ret = cpupool_move_domain(cd, cpupool)) )
+ return ret;
+
+ for ( i = 0; i < cd->max_vcpus; i++ )
+ {
+ if ( cd->vcpu[i] )
+ continue;
+
+ if ( !vcpu_create(cd, i) )
+ return -EINVAL;
+ }
+
+ domain_update_node_affinity(cd);
+ return 0;
+}
+
+static int fork_hap_allocation(struct domain *d, struct domain *cd)
+{
+ int rc;
+ bool preempted;
+ unsigned long mb = hap_get_allocation(d);
+
+ if ( mb == hap_get_allocation(cd) )
+ return 0;
+
+ paging_lock(cd);
+ rc = hap_set_allocation(cd, mb << (20 - PAGE_SHIFT), &preempted);
+ paging_unlock(cd);
+
+ if ( rc )
+ return rc;
+
+ if ( preempted )
+ return -ERESTART;
+
+ return 0;
+}
+
+static int fork_hvm(struct domain *d, struct domain *cd)
+{
+ int rc, i;
+ struct hvm_domain_context c = { 0 };
+ uint32_t tsc_mode;
+ uint32_t gtsc_khz;
+ uint32_t incarnation;
+ uint64_t elapsed_nsec;
+
+ c.size = hvm_save_size(d);
+ if ( (c.data = xmalloc_bytes(c.size)) == NULL )
+ return -ENOMEM;
+
+ for ( i = 0; i < HVM_NR_PARAMS; i++ )
+ {
+ uint64_t value = 0;
+
+ if ( hvm_get_param(d, i, &value) || !value )
+ continue;
+
+ if ( (rc = hvm_set_param(cd, i, value)) )
+ goto out;
+ }
+
+ tsc_get_info(d, &tsc_mode, &elapsed_nsec, >sc_khz, &incarnation);
+ tsc_set_info(cd, tsc_mode, elapsed_nsec, gtsc_khz, incarnation);
+
+ if ( (rc = hvm_save(d, &c)) )
+ goto out;
+
+ c.cur = 0;
+ rc = hvm_load(cd, &c);
+
+out:
+ xfree(c.data);
+ return rc;
+}
+
+static int mem_sharing_fork(struct domain *d, struct domain *cd)
+{
+ int rc;
+
+ if ( !d->controller_pause_count &&
+ (rc = domain_pause_by_systemcontroller(d)) )
+ return rc;
+
+ cd->max_pages = d->max_pages;
+ cd->max_vcpus = d->max_vcpus;
+
+ /* this is preemptible so it's the first to get done */
+ if ( (rc = fork_hap_allocation(d, cd)) )
+ return rc;
+
+ if ( (rc = bring_up_vcpus(cd, d->cpupool)) )
+ return rc;
+
+ if ( (rc = fork_hvm(d, cd)) )
+ return rc;
+
+ cd->parent = d;
+
+ return 0;
+}
+
int mem_sharing_memop(XEN_GUEST_HANDLE_PARAM(xen_mem_sharing_op_t) arg)
{
int rc;
@@ -1677,6 +1876,35 @@ int mem_sharing_memop(XEN_GUEST_HANDLE_PARAM(xen_mem_sharing_op_t) arg)
rc = debug_gref(d, mso.u.debug.u.gref);
break;
+ case XENMEM_sharing_op_fork:
+ {
+ struct domain *pd;
+
+ rc = -EINVAL;
+ if ( mso.u.fork._pad[0] || mso.u.fork._pad[1] ||
+ mso.u.fork._pad[2] )
+ goto out;
+
+ rc = rcu_lock_live_remote_domain_by_id(mso.u.fork.parent_domain,
+ &pd);
+ if ( rc )
+ goto out;
+
+ if ( !mem_sharing_enabled(pd) )
+ {
+ if ( (rc = mem_sharing_control(pd, true)) )
+ goto out;
+ }
+
+ rc = mem_sharing_fork(pd, d);
+
+ if ( rc == -ERESTART )
+ rc = hypercall_create_continuation(__HYPERVISOR_memory_op,
+ "lh", XENMEM_sharing_op,
+ arg);
+ rcu_unlock_domain(pd);
+ break;
+ }
default:
rc = -ENOSYS;
break;
@@ -508,6 +508,14 @@ mfn_t __get_gfn_type_access(struct p2m_domain *p2m, unsigned long gfn_l,
mfn = p2m->get_entry(p2m, gfn, t, a, q, page_order, NULL);
+ /* Check if we need to fork the page */
+ if ( (q & P2M_ALLOC) && p2m_is_hole(*t) &&
+ !mem_sharing_fork_page(p2m->domain, gfn, !!(q & P2M_UNSHARE)) )
+ {
+ mfn = p2m->get_entry(p2m, gfn, t, a, q, page_order, NULL);
+ }
+
+ /* Check if we need to unshare the page */
if ( (q & P2M_UNSHARE) && p2m_is_shared(*t) )
{
ASSERT(p2m_is_hostp2m(p2m));
@@ -586,7 +594,8 @@ struct page_info *p2m_get_page_from_gfn(
return page;
/* Error path: not a suitable GFN at all */
- if ( !p2m_is_ram(*t) && !p2m_is_paging(*t) && !p2m_is_pod(*t) )
+ if ( !p2m_is_ram(*t) && !p2m_is_paging(*t) && !p2m_is_pod(*t) &&
+ !mem_sharing_is_fork(p2m->domain) )
return NULL;
}
@@ -26,8 +26,7 @@
#ifdef CONFIG_MEM_SHARING
-struct mem_sharing_domain
-{
+struct mem_sharing_domain {
bool enabled;
/*
@@ -40,6 +39,9 @@ struct mem_sharing_domain
#define mem_sharing_enabled(d) \
(hap_enabled(d) && (d)->arch.hvm.mem_sharing.enabled)
+#define mem_sharing_is_fork(d) \
+ (mem_sharing_enabled(d) && !!((d)->parent))
+
/* Auditing of memory sharing code? */
#ifndef NDEBUG
#define MEM_SHARING_AUDIT 1
@@ -90,6 +92,9 @@ int mem_sharing_unshare_page(struct domain *d,
return rc;
}
+int mem_sharing_fork_page(struct domain *d, gfn_t gfn,
+ bool unsharing);
+
/*
* If called by a foreign domain, possible errors are
* -EBUSY -> ring full
@@ -119,6 +124,7 @@ int relinquish_shared_pages(struct domain *d);
#else
#define mem_sharing_enabled(d) false
+#define mem_sharing_is_fork(p2m) false
static inline unsigned int mem_sharing_get_nr_saved_mfns(void)
{
@@ -145,6 +151,16 @@ int mem_sharing_notify_enomem(struct domain *d, unsigned long gfn,
return -EOPNOTSUPP;
}
+static inline int mem_sharing_fork(struct domain *d, struct domain *cd, bool vcpu)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int mem_sharing_fork_page(struct domain *d, gfn_t gfn, bool lock)
+{
+ return -EOPNOTSUPP;
+}
+
#endif
#endif /* __MEM_SHARING_H__ */
@@ -482,6 +482,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_mem_access_op_t);
#define XENMEM_sharing_op_add_physmap 6
#define XENMEM_sharing_op_audit 7
#define XENMEM_sharing_op_range_share 8
+#define XENMEM_sharing_op_fork 9
#define XENMEM_SHARING_OP_S_HANDLE_INVALID (-10)
#define XENMEM_SHARING_OP_C_HANDLE_INVALID (-9)
@@ -532,6 +533,10 @@ struct xen_mem_sharing_op {
uint32_t gref; /* IN: gref to debug */
} u;
} debug;
+ struct mem_sharing_op_fork {
+ domid_t parent_domain;
+ uint16_t _pad[3]; /* Must be set to 0 */
+ } fork;
} u;
};
typedef struct xen_mem_sharing_op xen_mem_sharing_op_t;
@@ -501,6 +501,7 @@ struct domain
/* Memory sharing support */
#ifdef CONFIG_MEM_SHARING
struct vm_event_domain *vm_event_share;
+ struct domain *parent; /* VM fork parent */
#endif
/* Memory paging support */
#ifdef CONFIG_HAS_MEM_PAGING
VM forking is the process of creating a domain with an empty memory space and a parent domain specified from which to populate the memory when necessary. For the new domain to be functional the VM state is copied over as part of the fork operation (HVM params, hap allocation, etc). Signed-off-by: Tamas K Lengyel <tamas.lengyel@intel.com> --- xen/arch/x86/hvm/hvm.c | 2 +- xen/arch/x86/mm/mem_sharing.c | 228 ++++++++++++++++++++++++++++++ xen/arch/x86/mm/p2m.c | 11 +- xen/include/asm-x86/mem_sharing.h | 20 ++- xen/include/public/memory.h | 5 + xen/include/xen/sched.h | 1 + 6 files changed, 263 insertions(+), 4 deletions(-)