diff mbox

[v5,22/23] x86/mm: split out PV mm hypercalls to pv/mm-hypercalls.c

Message ID 20170914125852.22129-23-wei.liu2@citrix.com (mailing list archive)
State New, archived
Headers show

Commit Message

Wei Liu Sept. 14, 2017, 12:58 p.m. UTC
Also move new_guest_cr3 there so that we don't have to export
mod_l1_entry.

Fix coding style issues. Change v to curr, d to currd and u64 to
uint64_t where appropriate.

Signed-off-by: Wei Liu <wei.liu2@citrix.com>
---
I can't convince git diff to produce sensible diff for donate_page and
steal_page. Those functions are not changed.
---
 xen/arch/x86/mm.c               | 1570 ++-------------------------------------
 xen/arch/x86/pv/Makefile        |    1 +
 xen/arch/x86/pv/mm-hypercalls.c | 1461 ++++++++++++++++++++++++++++++++++++
 3 files changed, 1535 insertions(+), 1497 deletions(-)
 create mode 100644 xen/arch/x86/pv/mm-hypercalls.c
diff mbox

Patch

diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index 26e3492597..2ffcc53c6c 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -885,283 +885,6 @@  void page_unlock(struct page_info *page)
     } while ( (y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x );
 }
 
-/*
- * PTE flags that a guest may change without re-validating the PTE.
- * All other bits affect translation, caching, or Xen's safety.
- */
-#define FASTPATH_FLAG_WHITELIST                                     \
-    (_PAGE_NX_BIT | _PAGE_AVAIL_HIGH | _PAGE_AVAIL | _PAGE_GLOBAL | \
-     _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_USER)
-
-/* Update the L1 entry at pl1e to new value nl1e. */
-static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e,
-                        unsigned long gl1mfn, int preserve_ad,
-                        struct vcpu *pt_vcpu, struct domain *pg_dom)
-{
-    l1_pgentry_t ol1e;
-    struct domain *pt_dom = pt_vcpu->domain;
-    int rc = 0;
-
-    if ( unlikely(__copy_from_user(&ol1e, pl1e, sizeof(ol1e)) != 0) )
-        return -EFAULT;
-
-    ASSERT(!paging_mode_refcounts(pt_dom));
-
-    if ( l1e_get_flags(nl1e) & _PAGE_PRESENT )
-    {
-        /* Translate foreign guest addresses. */
-        struct page_info *page = NULL;
-
-        if ( unlikely(l1e_get_flags(nl1e) & l1_disallow_mask(pt_dom)) )
-        {
-            gdprintk(XENLOG_WARNING, "Bad L1 flags %x\n",
-                    l1e_get_flags(nl1e) & l1_disallow_mask(pt_dom));
-            return -EINVAL;
-        }
-
-        if ( paging_mode_translate(pg_dom) )
-        {
-            page = get_page_from_gfn(pg_dom, l1e_get_pfn(nl1e), NULL, P2M_ALLOC);
-            if ( !page )
-                return -EINVAL;
-            nl1e = l1e_from_page(page, l1e_get_flags(nl1e));
-        }
-
-        /* Fast path for sufficiently-similar mappings. */
-        if ( !l1e_has_changed(ol1e, nl1e, ~FASTPATH_FLAG_WHITELIST) )
-        {
-            nl1e = adjust_guest_l1e(nl1e, pt_dom);
-            rc = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, pt_vcpu,
-                              preserve_ad);
-            if ( page )
-                put_page(page);
-            return rc ? 0 : -EBUSY;
-        }
-
-        switch ( rc = get_page_from_l1e(nl1e, pt_dom, pg_dom) )
-        {
-        default:
-            if ( page )
-                put_page(page);
-            return rc;
-        case 0:
-            break;
-        case _PAGE_RW ... _PAGE_RW | PAGE_CACHE_ATTRS:
-            ASSERT(!(rc & ~(_PAGE_RW | PAGE_CACHE_ATTRS)));
-            l1e_flip_flags(nl1e, rc);
-            rc = 0;
-            break;
-        }
-        if ( page )
-            put_page(page);
-
-        nl1e = adjust_guest_l1e(nl1e, pt_dom);
-        if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, pt_vcpu,
-                                    preserve_ad)) )
-        {
-            ol1e = nl1e;
-            rc = -EBUSY;
-        }
-    }
-    else if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, pt_vcpu,
-                                     preserve_ad)) )
-    {
-        return -EBUSY;
-    }
-
-    put_page_from_l1e(ol1e, pt_dom);
-    return rc;
-}
-
-
-/* Update the L2 entry at pl2e to new value nl2e. pl2e is within frame pfn. */
-static int mod_l2_entry(l2_pgentry_t *pl2e,
-                        l2_pgentry_t nl2e,
-                        unsigned long pfn,
-                        int preserve_ad,
-                        struct vcpu *vcpu)
-{
-    l2_pgentry_t ol2e;
-    struct domain *d = vcpu->domain;
-    struct page_info *l2pg = mfn_to_page(_mfn(pfn));
-    unsigned long type = l2pg->u.inuse.type_info;
-    int rc = 0;
-
-    if ( unlikely(!is_guest_l2_slot(d, type, pgentry_ptr_to_slot(pl2e))) )
-    {
-        gdprintk(XENLOG_WARNING, "L2 update in Xen-private area, slot %#lx\n",
-                 pgentry_ptr_to_slot(pl2e));
-        return -EPERM;
-    }
-
-    if ( unlikely(__copy_from_user(&ol2e, pl2e, sizeof(ol2e)) != 0) )
-        return -EFAULT;
-
-    if ( l2e_get_flags(nl2e) & _PAGE_PRESENT )
-    {
-        if ( unlikely(l2e_get_flags(nl2e) & L2_DISALLOW_MASK) )
-        {
-            gdprintk(XENLOG_WARNING, "Bad L2 flags %x\n",
-                    l2e_get_flags(nl2e) & L2_DISALLOW_MASK);
-            return -EINVAL;
-        }
-
-        /* Fast path for sufficiently-similar mappings. */
-        if ( !l2e_has_changed(ol2e, nl2e, ~FASTPATH_FLAG_WHITELIST) )
-        {
-            nl2e = adjust_guest_l2e(nl2e, d);
-            if ( UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, vcpu, preserve_ad) )
-                return 0;
-            return -EBUSY;
-        }
-
-        if ( unlikely((rc = get_page_from_l2e(nl2e, pfn, d)) < 0) )
-            return rc;
-
-        nl2e = adjust_guest_l2e(nl2e, d);
-        if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, vcpu,
-                                    preserve_ad)) )
-        {
-            ol2e = nl2e;
-            rc = -EBUSY;
-        }
-    }
-    else if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, vcpu,
-                                     preserve_ad)) )
-    {
-        return -EBUSY;
-    }
-
-    put_page_from_l2e(ol2e, pfn);
-    return rc;
-}
-
-/* Update the L3 entry at pl3e to new value nl3e. pl3e is within frame pfn. */
-static int mod_l3_entry(l3_pgentry_t *pl3e,
-                        l3_pgentry_t nl3e,
-                        unsigned long pfn,
-                        int preserve_ad,
-                        struct vcpu *vcpu)
-{
-    l3_pgentry_t ol3e;
-    struct domain *d = vcpu->domain;
-    int rc = 0;
-
-    /*
-     * Disallow updates to final L3 slot. It contains Xen mappings, and it
-     * would be a pain to ensure they remain continuously valid throughout.
-     */
-    if ( is_pv_32bit_domain(d) && (pgentry_ptr_to_slot(pl3e) >= 3) )
-        return -EINVAL;
-
-    if ( unlikely(__copy_from_user(&ol3e, pl3e, sizeof(ol3e)) != 0) )
-        return -EFAULT;
-
-    if ( l3e_get_flags(nl3e) & _PAGE_PRESENT )
-    {
-        if ( unlikely(l3e_get_flags(nl3e) & l3_disallow_mask(d)) )
-        {
-            gdprintk(XENLOG_WARNING, "Bad L3 flags %x\n",
-                    l3e_get_flags(nl3e) & l3_disallow_mask(d));
-            return -EINVAL;
-        }
-
-        /* Fast path for sufficiently-similar mappings. */
-        if ( !l3e_has_changed(ol3e, nl3e, ~FASTPATH_FLAG_WHITELIST) )
-        {
-            nl3e = adjust_guest_l3e(nl3e, d);
-            rc = UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, vcpu, preserve_ad);
-            return rc ? 0 : -EFAULT;
-        }
-
-        rc = get_page_from_l3e(nl3e, pfn, d, 0);
-        if ( unlikely(rc < 0) )
-            return rc;
-        rc = 0;
-
-        nl3e = adjust_guest_l3e(nl3e, d);
-        if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, vcpu,
-                                    preserve_ad)) )
-        {
-            ol3e = nl3e;
-            rc = -EFAULT;
-        }
-    }
-    else if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, vcpu,
-                                     preserve_ad)) )
-    {
-        return -EFAULT;
-    }
-
-    if ( likely(rc == 0) )
-        if ( !create_pae_xen_mappings(d, pl3e) )
-            BUG();
-
-    put_page_from_l3e(ol3e, pfn, 0, 1);
-    return rc;
-}
-
-/* Update the L4 entry at pl4e to new value nl4e. pl4e is within frame pfn. */
-static int mod_l4_entry(l4_pgentry_t *pl4e,
-                        l4_pgentry_t nl4e,
-                        unsigned long pfn,
-                        int preserve_ad,
-                        struct vcpu *vcpu)
-{
-    struct domain *d = vcpu->domain;
-    l4_pgentry_t ol4e;
-    int rc = 0;
-
-    if ( unlikely(!is_guest_l4_slot(d, pgentry_ptr_to_slot(pl4e))) )
-    {
-        gdprintk(XENLOG_WARNING, "L4 update in Xen-private area, slot %#lx\n",
-                 pgentry_ptr_to_slot(pl4e));
-        return -EINVAL;
-    }
-
-    if ( unlikely(__copy_from_user(&ol4e, pl4e, sizeof(ol4e)) != 0) )
-        return -EFAULT;
-
-    if ( l4e_get_flags(nl4e) & _PAGE_PRESENT )
-    {
-        if ( unlikely(l4e_get_flags(nl4e) & L4_DISALLOW_MASK) )
-        {
-            gdprintk(XENLOG_WARNING, "Bad L4 flags %x\n",
-                    l4e_get_flags(nl4e) & L4_DISALLOW_MASK);
-            return -EINVAL;
-        }
-
-        /* Fast path for sufficiently-similar mappings. */
-        if ( !l4e_has_changed(ol4e, nl4e, ~FASTPATH_FLAG_WHITELIST) )
-        {
-            nl4e = adjust_guest_l4e(nl4e, d);
-            rc = UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, vcpu, preserve_ad);
-            return rc ? 0 : -EFAULT;
-        }
-
-        rc = get_page_from_l4e(nl4e, pfn, d, 0);
-        if ( unlikely(rc < 0) )
-            return rc;
-        rc = 0;
-
-        nl4e = adjust_guest_l4e(nl4e, d);
-        if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, vcpu,
-                                    preserve_ad)) )
-        {
-            ol4e = nl4e;
-            rc = -EFAULT;
-        }
-    }
-    else if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, vcpu,
-                                     preserve_ad)) )
-    {
-        return -EFAULT;
-    }
-
-    put_page_from_l4e(ol4e, pfn, 0, 1);
-    return rc;
-}
-
 static int cleanup_page_cacheattr(struct page_info *page)
 {
     unsigned int cacheattr =
@@ -1602,1132 +1325,101 @@  int vcpu_destroy_pagetables(struct vcpu *v)
     return rc != -EINTR ? rc : -ERESTART;
 }
 
-int new_guest_cr3(mfn_t mfn)
+int donate_page(
+    struct domain *d, struct page_info *page, unsigned int memflags)
 {
-    struct vcpu *curr = current;
-    struct domain *d = curr->domain;
-    int rc;
-    mfn_t old_base_mfn;
-
-    if ( is_pv_32bit_domain(d) )
-    {
-        mfn_t gt_mfn = pagetable_get_mfn(curr->arch.guest_table);
-        l4_pgentry_t *pl4e = map_domain_page(gt_mfn);
-
-        rc = mod_l4_entry(pl4e,
-                          l4e_from_mfn(mfn,
-                                       (_PAGE_PRESENT | _PAGE_RW |
-                                        _PAGE_USER | _PAGE_ACCESSED)),
-                          mfn_x(gt_mfn), 0, curr);
-        unmap_domain_page(pl4e);
-        switch ( rc )
-        {
-        case 0:
-            break;
-        case -EINTR:
-        case -ERESTART:
-            return -ERESTART;
-        default:
-            gdprintk(XENLOG_WARNING,
-                     "Error while installing new compat baseptr %" PRI_mfn "\n",
-                     mfn_x(mfn));
-            return rc;
-        }
+    const struct domain *owner = dom_xen;
 
-        pv_invalidate_shadow_ldt(curr, 0);
-        write_ptbase(curr);
+    spin_lock(&d->page_alloc_lock);
 
-        return 0;
-    }
+    if ( is_xen_heap_page(page) || ((owner = page_get_owner(page)) != NULL) )
+        goto fail;
 
-    rc = put_old_guest_table(curr);
-    if ( unlikely(rc) )
-        return rc;
+    if ( d->is_dying )
+        goto fail;
 
-    old_base_mfn = pagetable_get_mfn(curr->arch.guest_table);
-    /*
-     * This is particularly important when getting restarted after the
-     * previous attempt got preempted in the put-old-MFN phase.
-     */
-    if ( mfn_eq(old_base_mfn, mfn) )
-    {
-        write_ptbase(curr);
-        return 0;
-    }
+    if ( page->count_info & ~(PGC_allocated | 1) )
+        goto fail;
 
-    rc = paging_mode_refcounts(d)
-         ? (get_page_from_mfn(mfn, d) ? 0 : -EINVAL)
-         : get_page_and_type_from_mfn(mfn, PGT_root_page_table, d, 0, 1);
-    switch ( rc )
+    if ( !(memflags & MEMF_no_refcount) )
     {
-    case 0:
-        break;
-    case -EINTR:
-    case -ERESTART:
-        return -ERESTART;
-    default:
-        gdprintk(XENLOG_WARNING,
-                 "Error while installing new baseptr %" PRI_mfn "\n",
-                 mfn_x(mfn));
-        return rc;
+        if ( d->tot_pages >= d->max_pages )
+            goto fail;
+        domain_adjust_tot_pages(d, 1);
     }
 
-    pv_invalidate_shadow_ldt(curr, 0);
-
-    if ( !VM_ASSIST(d, m2p_strict) && !paging_mode_refcounts(d) )
-        fill_ro_mpt(mfn);
-    curr->arch.guest_table = pagetable_from_mfn(mfn);
-    update_cr3(curr);
-
-    write_ptbase(curr);
-
-    if ( likely(mfn_x(old_base_mfn) != 0) )
-    {
-        struct page_info *page = mfn_to_page(old_base_mfn);
+    page->count_info = PGC_allocated | 1;
+    page_set_owner(page, d);
+    page_list_add_tail(page,&d->page_list);
 
-        if ( paging_mode_refcounts(d) )
-            put_page(page);
-        else
-            switch ( rc = put_page_and_type_preemptible(page) )
-            {
-            case -EINTR:
-                rc = -ERESTART;
-                /* fallthrough */
-            case -ERESTART:
-                curr->arch.old_guest_table = page;
-                break;
-            default:
-                BUG_ON(rc);
-                break;
-            }
-    }
+    spin_unlock(&d->page_alloc_lock);
+    return 0;
 
-    return rc;
+ fail:
+    spin_unlock(&d->page_alloc_lock);
+    gdprintk(XENLOG_WARNING, "Bad donate mfn %" PRI_mfn
+             " to d%d (owner d%d) caf=%08lx taf=%" PRtype_info "\n",
+             mfn_x(page_to_mfn(page)), d->domain_id,
+             owner ? owner->domain_id : DOMID_INVALID,
+             page->count_info, page->u.inuse.type_info);
+    return -EINVAL;
 }
 
-static struct domain *get_pg_owner(domid_t domid)
+int steal_page(
+    struct domain *d, struct page_info *page, unsigned int memflags)
 {
-    struct domain *pg_owner = NULL, *curr = current->domain;
+    unsigned long x, y;
+    bool drop_dom_ref = false;
+    const struct domain *owner = dom_xen;
 
-    if ( likely(domid == DOMID_SELF) )
-    {
-        pg_owner = rcu_lock_current_domain();
-        goto out;
-    }
+    if ( paging_mode_external(d) )
+        return -EOPNOTSUPP;
 
-    if ( unlikely(domid == curr->domain_id) )
-    {
-        gdprintk(XENLOG_WARNING, "Cannot specify itself as foreign domain\n");
-        goto out;
-    }
+    spin_lock(&d->page_alloc_lock);
 
-    switch ( domid )
-    {
-    case DOMID_IO:
-        pg_owner = rcu_lock_domain(dom_io);
-        break;
-    case DOMID_XEN:
-        pg_owner = rcu_lock_domain(dom_xen);
-        break;
-    default:
-        if ( (pg_owner = rcu_lock_domain_by_id(domid)) == NULL )
-        {
-            gdprintk(XENLOG_WARNING, "Unknown domain d%d\n", domid);
-            break;
-        }
-        break;
-    }
+    if ( is_xen_heap_page(page) || ((owner = page_get_owner(page)) != d) )
+        goto fail;
 
- out:
-    return pg_owner;
-}
+    /*
+     * We require there is just one reference (PGC_allocated). We temporarily
+     * drop this reference now so that we can safely swizzle the owner.
+     */
+    y = page->count_info;
+    do {
+        x = y;
+        if ( (x & (PGC_count_mask|PGC_allocated)) != (1 | PGC_allocated) )
+            goto fail;
+        y = cmpxchg(&page->count_info, x, x & ~PGC_count_mask);
+    } while ( y != x );
 
-static void put_pg_owner(struct domain *pg_owner)
-{
-    rcu_unlock_domain(pg_owner);
-}
+    /*
+     * With the sole reference dropped temporarily, no-one can update type
+     * information. Type count also needs to be zero in this case, but e.g.
+     * PGT_seg_desc_page may still have PGT_validated set, which we need to
+     * clear before transferring ownership (as validation criteria vary
+     * depending on domain type).
+     */
+    BUG_ON(page->u.inuse.type_info & (PGT_count_mask | PGT_locked |
+                                      PGT_pinned));
+    page->u.inuse.type_info = 0;
 
-static inline int vcpumask_to_pcpumask(
-    struct domain *d, XEN_GUEST_HANDLE_PARAM(const_void) bmap, cpumask_t *pmask)
-{
-    unsigned int vcpu_id, vcpu_bias, offs;
-    unsigned long vmask;
-    struct vcpu *v;
-    bool is_native = !is_pv_32bit_domain(d);
+    /* Swizzle the owner then reinstate the PGC_allocated reference. */
+    page_set_owner(page, NULL);
+    y = page->count_info;
+    do {
+        x = y;
+        BUG_ON((x & (PGC_count_mask|PGC_allocated)) != PGC_allocated);
+    } while ( (y = cmpxchg(&page->count_info, x, x | 1)) != x );
 
-    cpumask_clear(pmask);
-    for ( vmask = 0, offs = 0; ; ++offs )
-    {
-        vcpu_bias = offs * (is_native ? BITS_PER_LONG : 32);
-        if ( vcpu_bias >= d->max_vcpus )
-            return 0;
+    /* Unlink from original owner. */
+    if ( !(memflags & MEMF_no_refcount) && !domain_adjust_tot_pages(d, -1) )
+        drop_dom_ref = true;
+    page_list_del(page, &d->page_list);
 
-        if ( unlikely(is_native ?
-                      copy_from_guest_offset(&vmask, bmap, offs, 1) :
-                      copy_from_guest_offset((unsigned int *)&vmask, bmap,
-                                             offs, 1)) )
-        {
-            cpumask_clear(pmask);
-            return -EFAULT;
-        }
-
-        while ( vmask )
-        {
-            vcpu_id = find_first_set_bit(vmask);
-            vmask &= ~(1UL << vcpu_id);
-            vcpu_id += vcpu_bias;
-            if ( (vcpu_id >= d->max_vcpus) )
-                return 0;
-            if ( ((v = d->vcpu[vcpu_id]) != NULL) )
-                cpumask_or(pmask, pmask, v->vcpu_dirty_cpumask);
-        }
-    }
-}
-
-long do_mmuext_op(
-    XEN_GUEST_HANDLE_PARAM(mmuext_op_t) uops,
-    unsigned int count,
-    XEN_GUEST_HANDLE_PARAM(uint) pdone,
-    unsigned int foreigndom)
-{
-    struct mmuext_op op;
-    unsigned long type;
-    unsigned int i, done = 0;
-    struct vcpu *curr = current;
-    struct domain *currd = curr->domain;
-    struct domain *pg_owner;
-    int rc = put_old_guest_table(curr);
-
-    if ( unlikely(rc) )
-    {
-        if ( likely(rc == -ERESTART) )
-            rc = hypercall_create_continuation(
-                     __HYPERVISOR_mmuext_op, "hihi", uops, count, pdone,
-                     foreigndom);
-        return rc;
-    }
-
-    if ( unlikely(count == MMU_UPDATE_PREEMPTED) &&
-         likely(guest_handle_is_null(uops)) )
-    {
-        /*
-         * See the curr->arch.old_guest_table related
-         * hypercall_create_continuation() below.
-         */
-        return (int)foreigndom;
-    }
-
-    if ( unlikely(count & MMU_UPDATE_PREEMPTED) )
-    {
-        count &= ~MMU_UPDATE_PREEMPTED;
-        if ( unlikely(!guest_handle_is_null(pdone)) )
-            (void)copy_from_guest(&done, pdone, 1);
-    }
-    else
-        perfc_incr(calls_to_mmuext_op);
-
-    if ( unlikely(!guest_handle_okay(uops, count)) )
-        return -EFAULT;
-
-    if ( (pg_owner = get_pg_owner(foreigndom)) == NULL )
-        return -ESRCH;
-
-    if ( !is_pv_domain(pg_owner) )
-    {
-        put_pg_owner(pg_owner);
-        return -EINVAL;
-    }
-
-    rc = xsm_mmuext_op(XSM_TARGET, currd, pg_owner);
-    if ( rc )
-    {
-        put_pg_owner(pg_owner);
-        return rc;
-    }
-
-    for ( i = 0; i < count; i++ )
-    {
-        if ( curr->arch.old_guest_table || (i && hypercall_preempt_check()) )
-        {
-            rc = -ERESTART;
-            break;
-        }
-
-        if ( unlikely(__copy_from_guest(&op, uops, 1) != 0) )
-        {
-            rc = -EFAULT;
-            break;
-        }
-
-        if ( is_hvm_domain(currd) )
-        {
-            switch ( op.cmd )
-            {
-            case MMUEXT_PIN_L1_TABLE:
-            case MMUEXT_PIN_L2_TABLE:
-            case MMUEXT_PIN_L3_TABLE:
-            case MMUEXT_PIN_L4_TABLE:
-            case MMUEXT_UNPIN_TABLE:
-                break;
-            default:
-                rc = -EOPNOTSUPP;
-                goto done;
-            }
-        }
-
-        rc = 0;
-
-        switch ( op.cmd )
-        {
-            struct page_info *page;
-            p2m_type_t p2mt;
-
-        case MMUEXT_PIN_L1_TABLE:
-            type = PGT_l1_page_table;
-            goto pin_page;
-
-        case MMUEXT_PIN_L2_TABLE:
-            type = PGT_l2_page_table;
-            goto pin_page;
-
-        case MMUEXT_PIN_L3_TABLE:
-            type = PGT_l3_page_table;
-            goto pin_page;
-
-        case MMUEXT_PIN_L4_TABLE:
-            if ( is_pv_32bit_domain(pg_owner) )
-                break;
-            type = PGT_l4_page_table;
-
-        pin_page:
-            /* Ignore pinning of invalid paging levels. */
-            if ( (op.cmd - MMUEXT_PIN_L1_TABLE) > (CONFIG_PAGING_LEVELS - 1) )
-                break;
-
-            if ( paging_mode_refcounts(pg_owner) )
-                break;
-
-            page = get_page_from_gfn(pg_owner, op.arg1.mfn, NULL, P2M_ALLOC);
-            if ( unlikely(!page) )
-            {
-                rc = -EINVAL;
-                break;
-            }
-
-            rc = get_page_type_preemptible(page, type);
-            if ( unlikely(rc) )
-            {
-                if ( rc == -EINTR )
-                    rc = -ERESTART;
-                else if ( rc != -ERESTART )
-                    gdprintk(XENLOG_WARNING,
-                             "Error %d while pinning mfn %" PRI_mfn "\n",
-                             rc, mfn_x(page_to_mfn(page)));
-                if ( page != curr->arch.old_guest_table )
-                    put_page(page);
-                break;
-            }
-
-            rc = xsm_memory_pin_page(XSM_HOOK, currd, pg_owner, page);
-            if ( !rc && unlikely(test_and_set_bit(_PGT_pinned,
-                                                  &page->u.inuse.type_info)) )
-            {
-                gdprintk(XENLOG_WARNING,
-                         "mfn %" PRI_mfn " already pinned\n",
-                         mfn_x(page_to_mfn(page)));
-                rc = -EINVAL;
-            }
-
-            if ( unlikely(rc) )
-                goto pin_drop;
-
-            /* A page is dirtied when its pin status is set. */
-            paging_mark_dirty(pg_owner, page_to_mfn(page));
-
-            /* We can race domain destruction (domain_relinquish_resources). */
-            if ( unlikely(pg_owner != currd) )
-            {
-                bool drop_ref;
-
-                spin_lock(&pg_owner->page_alloc_lock);
-                drop_ref = (pg_owner->is_dying &&
-                            test_and_clear_bit(_PGT_pinned,
-                                               &page->u.inuse.type_info));
-                spin_unlock(&pg_owner->page_alloc_lock);
-                if ( drop_ref )
-                {
-        pin_drop:
-                    if ( type == PGT_l1_page_table )
-                        put_page_and_type(page);
-                    else
-                        curr->arch.old_guest_table = page;
-                }
-            }
-            break;
-
-        case MMUEXT_UNPIN_TABLE:
-            if ( paging_mode_refcounts(pg_owner) )
-                break;
-
-            page = get_page_from_gfn(pg_owner, op.arg1.mfn, NULL, P2M_ALLOC);
-            if ( unlikely(!page) )
-            {
-                gdprintk(XENLOG_WARNING,
-                         "mfn %" PRI_mfn " bad, or bad owner d%d\n",
-                         op.arg1.mfn, pg_owner->domain_id);
-                rc = -EINVAL;
-                break;
-            }
-
-            if ( !test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info) )
-            {
-                put_page(page);
-                gdprintk(XENLOG_WARNING,
-                         "mfn %" PRI_mfn " not pinned\n", op.arg1.mfn);
-                rc = -EINVAL;
-                break;
-            }
-
-            switch ( rc = put_page_and_type_preemptible(page) )
-            {
-            case -EINTR:
-            case -ERESTART:
-                curr->arch.old_guest_table = page;
-                rc = 0;
-                break;
-            default:
-                BUG_ON(rc);
-                break;
-            }
-            put_page(page);
-
-            /* A page is dirtied when its pin status is cleared. */
-            paging_mark_dirty(pg_owner, page_to_mfn(page));
-            break;
-
-        case MMUEXT_NEW_BASEPTR:
-            if ( unlikely(currd != pg_owner) )
-                rc = -EPERM;
-            else if ( unlikely(paging_mode_translate(currd)) )
-                rc = -EINVAL;
-            else
-                rc = new_guest_cr3(_mfn(op.arg1.mfn));
-            break;
-
-        case MMUEXT_NEW_USER_BASEPTR: {
-            unsigned long old_mfn;
-
-            if ( unlikely(currd != pg_owner) )
-                rc = -EPERM;
-            else if ( unlikely(paging_mode_translate(currd)) )
-                rc = -EINVAL;
-            if ( unlikely(rc) )
-                break;
-
-            old_mfn = pagetable_get_pfn(curr->arch.guest_table_user);
-            /*
-             * This is particularly important when getting restarted after the
-             * previous attempt got preempted in the put-old-MFN phase.
-             */
-            if ( old_mfn == op.arg1.mfn )
-                break;
-
-            if ( op.arg1.mfn != 0 )
-            {
-                rc = get_page_and_type_from_mfn(
-                    _mfn(op.arg1.mfn), PGT_root_page_table, currd, 0, 1);
-
-                if ( unlikely(rc) )
-                {
-                    if ( rc == -EINTR )
-                        rc = -ERESTART;
-                    else if ( rc != -ERESTART )
-                        gdprintk(XENLOG_WARNING,
-                                 "Error %d installing new mfn %" PRI_mfn "\n",
-                                 rc, op.arg1.mfn);
-                    break;
-                }
-
-                if ( VM_ASSIST(currd, m2p_strict) )
-                    zap_ro_mpt(_mfn(op.arg1.mfn));
-            }
-
-            curr->arch.guest_table_user = pagetable_from_pfn(op.arg1.mfn);
-
-            if ( old_mfn != 0 )
-            {
-                page = mfn_to_page(_mfn(old_mfn));
-
-                switch ( rc = put_page_and_type_preemptible(page) )
-                {
-                case -EINTR:
-                    rc = -ERESTART;
-                    /* fallthrough */
-                case -ERESTART:
-                    curr->arch.old_guest_table = page;
-                    break;
-                default:
-                    BUG_ON(rc);
-                    break;
-                }
-            }
-
-            break;
-        }
-
-        case MMUEXT_TLB_FLUSH_LOCAL:
-            if ( likely(currd == pg_owner) )
-                flush_tlb_local();
-            else
-                rc = -EPERM;
-            break;
-
-        case MMUEXT_INVLPG_LOCAL:
-            if ( unlikely(currd != pg_owner) )
-                rc = -EPERM;
-            else
-                paging_invlpg(curr, op.arg1.linear_addr);
-            break;
-
-        case MMUEXT_TLB_FLUSH_MULTI:
-        case MMUEXT_INVLPG_MULTI:
-        {
-            cpumask_t *mask = this_cpu(scratch_cpumask);
-
-            if ( unlikely(currd != pg_owner) )
-                rc = -EPERM;
-            else if ( unlikely(vcpumask_to_pcpumask(currd,
-                                   guest_handle_to_param(op.arg2.vcpumask,
-                                                         const_void),
-                                   mask)) )
-                rc = -EINVAL;
-            if ( unlikely(rc) )
-                break;
-
-            if ( op.cmd == MMUEXT_TLB_FLUSH_MULTI )
-                flush_tlb_mask(mask);
-            else if ( __addr_ok(op.arg1.linear_addr) )
-                flush_tlb_one_mask(mask, op.arg1.linear_addr);
-            break;
-        }
-
-        case MMUEXT_TLB_FLUSH_ALL:
-            if ( likely(currd == pg_owner) )
-                flush_tlb_mask(currd->domain_dirty_cpumask);
-            else
-                rc = -EPERM;
-            break;
-
-        case MMUEXT_INVLPG_ALL:
-            if ( unlikely(currd != pg_owner) )
-                rc = -EPERM;
-            else if ( __addr_ok(op.arg1.linear_addr) )
-                flush_tlb_one_mask(currd->domain_dirty_cpumask,
-                                   op.arg1.linear_addr);
-            break;
-
-        case MMUEXT_FLUSH_CACHE:
-            if ( unlikely(currd != pg_owner) )
-                rc = -EPERM;
-            else if ( unlikely(!cache_flush_permitted(currd)) )
-                rc = -EACCES;
-            else
-                wbinvd();
-            break;
-
-        case MMUEXT_FLUSH_CACHE_GLOBAL:
-            if ( unlikely(currd != pg_owner) )
-                rc = -EPERM;
-            else if ( likely(cache_flush_permitted(currd)) )
-            {
-                unsigned int cpu;
-                cpumask_t *mask = this_cpu(scratch_cpumask);
-
-                cpumask_clear(mask);
-                for_each_online_cpu(cpu)
-                    if ( !cpumask_intersects(mask,
-                                             per_cpu(cpu_sibling_mask, cpu)) )
-                        __cpumask_set_cpu(cpu, mask);
-                flush_mask(mask, FLUSH_CACHE);
-            }
-            else
-                rc = -EINVAL;
-            break;
-
-        case MMUEXT_SET_LDT:
-        {
-            unsigned int ents = op.arg2.nr_ents;
-            unsigned long ptr = ents ? op.arg1.linear_addr : 0;
-
-            if ( unlikely(currd != pg_owner) )
-                rc = -EPERM;
-            else if ( paging_mode_external(currd) )
-                rc = -EINVAL;
-            else if ( ((ptr & (PAGE_SIZE - 1)) != 0) || !__addr_ok(ptr) ||
-                      (ents > 8192) )
-            {
-                gdprintk(XENLOG_WARNING,
-                         "Bad args to SET_LDT: ptr=%lx, ents=%x\n", ptr, ents);
-                rc = -EINVAL;
-            }
-            else if ( (curr->arch.pv_vcpu.ldt_ents != ents) ||
-                      (curr->arch.pv_vcpu.ldt_base != ptr) )
-            {
-                pv_invalidate_shadow_ldt(curr, 0);
-                flush_tlb_local();
-                curr->arch.pv_vcpu.ldt_base = ptr;
-                curr->arch.pv_vcpu.ldt_ents = ents;
-                load_LDT(curr);
-            }
-            break;
-        }
-
-        case MMUEXT_CLEAR_PAGE:
-            page = get_page_from_gfn(pg_owner, op.arg1.mfn, &p2mt, P2M_ALLOC);
-            if ( unlikely(p2mt != p2m_ram_rw) && page )
-            {
-                put_page(page);
-                page = NULL;
-            }
-            if ( !page || !get_page_type(page, PGT_writable_page) )
-            {
-                if ( page )
-                    put_page(page);
-                gdprintk(XENLOG_WARNING,
-                         "Error clearing mfn %" PRI_mfn "\n", op.arg1.mfn);
-                rc = -EINVAL;
-                break;
-            }
-
-            /* A page is dirtied when it's being cleared. */
-            paging_mark_dirty(pg_owner, page_to_mfn(page));
-
-            clear_domain_page(page_to_mfn(page));
-
-            put_page_and_type(page);
-            break;
-
-        case MMUEXT_COPY_PAGE:
-        {
-            struct page_info *src_page, *dst_page;
-
-            src_page = get_page_from_gfn(pg_owner, op.arg2.src_mfn, &p2mt,
-                                         P2M_ALLOC);
-            if ( unlikely(p2mt != p2m_ram_rw) && src_page )
-            {
-                put_page(src_page);
-                src_page = NULL;
-            }
-            if ( unlikely(!src_page) )
-            {
-                gdprintk(XENLOG_WARNING,
-                         "Error copying from mfn %" PRI_mfn "\n",
-                         op.arg2.src_mfn);
-                rc = -EINVAL;
-                break;
-            }
-
-            dst_page = get_page_from_gfn(pg_owner, op.arg1.mfn, &p2mt,
-                                         P2M_ALLOC);
-            if ( unlikely(p2mt != p2m_ram_rw) && dst_page )
-            {
-                put_page(dst_page);
-                dst_page = NULL;
-            }
-            rc = (dst_page &&
-                  get_page_type(dst_page, PGT_writable_page)) ? 0 : -EINVAL;
-            if ( unlikely(rc) )
-            {
-                put_page(src_page);
-                if ( dst_page )
-                    put_page(dst_page);
-                gdprintk(XENLOG_WARNING,
-                         "Error copying to mfn %" PRI_mfn "\n", op.arg1.mfn);
-                break;
-            }
-
-            /* A page is dirtied when it's being copied to. */
-            paging_mark_dirty(pg_owner, page_to_mfn(dst_page));
-
-            copy_domain_page(page_to_mfn(dst_page), page_to_mfn(src_page));
-
-            put_page_and_type(dst_page);
-            put_page(src_page);
-            break;
-        }
-
-        case MMUEXT_MARK_SUPER:
-        case MMUEXT_UNMARK_SUPER:
-            rc = -EOPNOTSUPP;
-            break;
-
-        default:
-            rc = -ENOSYS;
-            break;
-        }
-
- done:
-        if ( unlikely(rc) )
-            break;
-
-        guest_handle_add_offset(uops, 1);
-    }
-
-    if ( rc == -ERESTART )
-    {
-        ASSERT(i < count);
-        rc = hypercall_create_continuation(
-            __HYPERVISOR_mmuext_op, "hihi",
-            uops, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom);
-    }
-    else if ( curr->arch.old_guest_table )
-    {
-        XEN_GUEST_HANDLE_PARAM(void) null;
-
-        ASSERT(rc || i == count);
-        set_xen_guest_handle(null, NULL);
-        /*
-         * In order to have a way to communicate the final return value to
-         * our continuation, we pass this in place of "foreigndom", building
-         * on the fact that this argument isn't needed anymore.
-         */
-        rc = hypercall_create_continuation(
-                __HYPERVISOR_mmuext_op, "hihi", null,
-                MMU_UPDATE_PREEMPTED, null, rc);
-    }
-
-    put_pg_owner(pg_owner);
-
-    perfc_add(num_mmuext_ops, i);
-
-    /* Add incremental work we have done to the @done output parameter. */
-    if ( unlikely(!guest_handle_is_null(pdone)) )
-    {
-        done += i;
-        copy_to_guest(pdone, &done, 1);
-    }
-
-    return rc;
-}
-
-long do_mmu_update(
-    XEN_GUEST_HANDLE_PARAM(mmu_update_t) ureqs,
-    unsigned int count,
-    XEN_GUEST_HANDLE_PARAM(uint) pdone,
-    unsigned int foreigndom)
-{
-    struct mmu_update req;
-    void *va = NULL;
-    unsigned long gpfn, gmfn, mfn;
-    struct page_info *page;
-    unsigned int cmd, i = 0, done = 0, pt_dom;
-    struct vcpu *curr = current, *v = curr;
-    struct domain *d = v->domain, *pt_owner = d, *pg_owner;
-    mfn_t map_mfn = INVALID_MFN;
-    uint32_t xsm_needed = 0;
-    uint32_t xsm_checked = 0;
-    int rc = put_old_guest_table(curr);
-
-    if ( unlikely(rc) )
-    {
-        if ( likely(rc == -ERESTART) )
-            rc = hypercall_create_continuation(
-                     __HYPERVISOR_mmu_update, "hihi", ureqs, count, pdone,
-                     foreigndom);
-        return rc;
-    }
-
-    if ( unlikely(count == MMU_UPDATE_PREEMPTED) &&
-         likely(guest_handle_is_null(ureqs)) )
-    {
-        /*
-         * See the curr->arch.old_guest_table related
-         * hypercall_create_continuation() below.
-         */
-        return (int)foreigndom;
-    }
-
-    if ( unlikely(count & MMU_UPDATE_PREEMPTED) )
-    {
-        count &= ~MMU_UPDATE_PREEMPTED;
-        if ( unlikely(!guest_handle_is_null(pdone)) )
-            (void)copy_from_guest(&done, pdone, 1);
-    }
-    else
-        perfc_incr(calls_to_mmu_update);
-
-    if ( unlikely(!guest_handle_okay(ureqs, count)) )
-        return -EFAULT;
-
-    if ( (pt_dom = foreigndom >> 16) != 0 )
-    {
-        /* Pagetables belong to a foreign domain (PFD). */
-        if ( (pt_owner = rcu_lock_domain_by_id(pt_dom - 1)) == NULL )
-            return -ESRCH;
-
-        if ( pt_owner == d )
-            rcu_unlock_domain(pt_owner);
-        else if ( !pt_owner->vcpu || (v = pt_owner->vcpu[0]) == NULL )
-        {
-            rc = -EINVAL;
-            goto out;
-        }
-    }
-
-    if ( (pg_owner = get_pg_owner((uint16_t)foreigndom)) == NULL )
-    {
-        rc = -ESRCH;
-        goto out;
-    }
-
-    for ( i = 0; i < count; i++ )
-    {
-        if ( curr->arch.old_guest_table || (i && hypercall_preempt_check()) )
-        {
-            rc = -ERESTART;
-            break;
-        }
-
-        if ( unlikely(__copy_from_guest(&req, ureqs, 1) != 0) )
-        {
-            rc = -EFAULT;
-            break;
-        }
-
-        cmd = req.ptr & (sizeof(l1_pgentry_t)-1);
-
-        switch ( cmd )
-        {
-            /*
-             * MMU_NORMAL_PT_UPDATE: Normal update to any level of page table.
-             * MMU_UPDATE_PT_PRESERVE_AD: As above but also preserve (OR)
-             * current A/D bits.
-             */
-        case MMU_NORMAL_PT_UPDATE:
-        case MMU_PT_UPDATE_PRESERVE_AD:
-        {
-            p2m_type_t p2mt;
-
-            rc = -EOPNOTSUPP;
-            if ( unlikely(paging_mode_refcounts(pt_owner)) )
-                break;
-
-            xsm_needed |= XSM_MMU_NORMAL_UPDATE;
-            if ( get_pte_flags(req.val) & _PAGE_PRESENT )
-            {
-                xsm_needed |= XSM_MMU_UPDATE_READ;
-                if ( get_pte_flags(req.val) & _PAGE_RW )
-                    xsm_needed |= XSM_MMU_UPDATE_WRITE;
-            }
-            if ( xsm_needed != xsm_checked )
-            {
-                rc = xsm_mmu_update(XSM_TARGET, d, pt_owner, pg_owner, xsm_needed);
-                if ( rc )
-                    break;
-                xsm_checked = xsm_needed;
-            }
-            rc = -EINVAL;
-
-            req.ptr -= cmd;
-            gmfn = req.ptr >> PAGE_SHIFT;
-            page = get_page_from_gfn(pt_owner, gmfn, &p2mt, P2M_ALLOC);
-
-            if ( p2m_is_paged(p2mt) )
-            {
-                ASSERT(!page);
-                p2m_mem_paging_populate(pg_owner, gmfn);
-                rc = -ENOENT;
-                break;
-            }
-
-            if ( unlikely(!page) )
-            {
-                gdprintk(XENLOG_WARNING,
-                         "Could not get page for normal update\n");
-                break;
-            }
-
-            mfn = mfn_x(page_to_mfn(page));
-
-            if ( !mfn_eq(_mfn(mfn), map_mfn) )
-            {
-                if ( va )
-                    unmap_domain_page(va);
-                va = map_domain_page(_mfn(mfn));
-                map_mfn = _mfn(mfn);
-            }
-            va = _p(((unsigned long)va & PAGE_MASK) + (req.ptr & ~PAGE_MASK));
-
-            if ( page_lock(page) )
-            {
-                switch ( page->u.inuse.type_info & PGT_type_mask )
-                {
-                case PGT_l1_page_table:
-                {
-                    l1_pgentry_t l1e = l1e_from_intpte(req.val);
-                    p2m_type_t l1e_p2mt = p2m_ram_rw;
-                    struct page_info *target = NULL;
-                    p2m_query_t q = (l1e_get_flags(l1e) & _PAGE_RW) ?
-                                        P2M_UNSHARE : P2M_ALLOC;
-
-                    if ( paging_mode_translate(pg_owner) )
-                        target = get_page_from_gfn(pg_owner, l1e_get_pfn(l1e),
-                                                   &l1e_p2mt, q);
-
-                    if ( p2m_is_paged(l1e_p2mt) )
-                    {
-                        if ( target )
-                            put_page(target);
-                        p2m_mem_paging_populate(pg_owner, l1e_get_pfn(l1e));
-                        rc = -ENOENT;
-                        break;
-                    }
-                    else if ( p2m_ram_paging_in == l1e_p2mt && !target )
-                    {
-                        rc = -ENOENT;
-                        break;
-                    }
-                    /* If we tried to unshare and failed */
-                    else if ( (q & P2M_UNSHARE) && p2m_is_shared(l1e_p2mt) )
-                    {
-                        /* We could not have obtained a page ref. */
-                        ASSERT(target == NULL);
-                        /* And mem_sharing_notify has already been called. */
-                        rc = -ENOMEM;
-                        break;
-                    }
-
-                    rc = mod_l1_entry(va, l1e, mfn,
-                                      cmd == MMU_PT_UPDATE_PRESERVE_AD, v,
-                                      pg_owner);
-                    if ( target )
-                        put_page(target);
-                }
-                break;
-                case PGT_l2_page_table:
-                    rc = mod_l2_entry(va, l2e_from_intpte(req.val), mfn,
-                                      cmd == MMU_PT_UPDATE_PRESERVE_AD, v);
-                    break;
-                case PGT_l3_page_table:
-                    rc = mod_l3_entry(va, l3e_from_intpte(req.val), mfn,
-                                      cmd == MMU_PT_UPDATE_PRESERVE_AD, v);
-                    break;
-                case PGT_l4_page_table:
-                    rc = mod_l4_entry(va, l4e_from_intpte(req.val), mfn,
-                                      cmd == MMU_PT_UPDATE_PRESERVE_AD, v);
-                break;
-                case PGT_writable_page:
-                    perfc_incr(writable_mmu_updates);
-                    if ( paging_write_guest_entry(v, va, req.val, _mfn(mfn)) )
-                        rc = 0;
-                    break;
-                }
-                page_unlock(page);
-                if ( rc == -EINTR )
-                    rc = -ERESTART;
-            }
-            else if ( get_page_type(page, PGT_writable_page) )
-            {
-                perfc_incr(writable_mmu_updates);
-                if ( paging_write_guest_entry(v, va, req.val, _mfn(mfn)) )
-                    rc = 0;
-                put_page_type(page);
-            }
-
-            put_page(page);
-        }
-        break;
-
-        case MMU_MACHPHYS_UPDATE:
-            if ( unlikely(d != pt_owner) )
-            {
-                rc = -EPERM;
-                break;
-            }
-
-            if ( unlikely(paging_mode_translate(pg_owner)) )
-            {
-                rc = -EINVAL;
-                break;
-            }
-
-            mfn = req.ptr >> PAGE_SHIFT;
-            gpfn = req.val;
-
-            xsm_needed |= XSM_MMU_MACHPHYS_UPDATE;
-            if ( xsm_needed != xsm_checked )
-            {
-                rc = xsm_mmu_update(XSM_TARGET, d, NULL, pg_owner, xsm_needed);
-                if ( rc )
-                    break;
-                xsm_checked = xsm_needed;
-            }
-
-            if ( unlikely(!get_page_from_mfn(_mfn(mfn), pg_owner)) )
-            {
-                gdprintk(XENLOG_WARNING,
-                         "Could not get page for mach->phys update\n");
-                rc = -EINVAL;
-                break;
-            }
-
-            set_gpfn_from_mfn(mfn, gpfn);
-
-            paging_mark_dirty(pg_owner, _mfn(mfn));
-
-            put_page(mfn_to_page(_mfn(mfn)));
-            break;
-
-        default:
-            rc = -ENOSYS;
-            break;
-        }
-
-        if ( unlikely(rc) )
-            break;
-
-        guest_handle_add_offset(ureqs, 1);
-    }
-
-    if ( rc == -ERESTART )
-    {
-        ASSERT(i < count);
-        rc = hypercall_create_continuation(
-            __HYPERVISOR_mmu_update, "hihi",
-            ureqs, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom);
-    }
-    else if ( curr->arch.old_guest_table )
-    {
-        XEN_GUEST_HANDLE_PARAM(void) null;
-
-        ASSERT(rc || i == count);
-        set_xen_guest_handle(null, NULL);
-        /*
-         * In order to have a way to communicate the final return value to
-         * our continuation, we pass this in place of "foreigndom", building
-         * on the fact that this argument isn't needed anymore.
-         */
-        rc = hypercall_create_continuation(
-                __HYPERVISOR_mmu_update, "hihi", null,
-                MMU_UPDATE_PREEMPTED, null, rc);
-    }
-
-    put_pg_owner(pg_owner);
-
-    if ( va )
-        unmap_domain_page(va);
-
-    perfc_add(num_page_updates, i);
-
- out:
-    if ( pt_owner != d )
-        rcu_unlock_domain(pt_owner);
-
-    /* Add incremental work we have done to the @done output parameter. */
-    if ( unlikely(!guest_handle_is_null(pdone)) )
-    {
-        done += i;
-        copy_to_guest(pdone, &done, 1);
-    }
-
-    return rc;
-}
-
-int donate_page(
-    struct domain *d, struct page_info *page, unsigned int memflags)
-{
-    const struct domain *owner = dom_xen;
-
-    spin_lock(&d->page_alloc_lock);
-
-    if ( is_xen_heap_page(page) || ((owner = page_get_owner(page)) != NULL) )
-        goto fail;
-
-    if ( d->is_dying )
-        goto fail;
-
-    if ( page->count_info & ~(PGC_allocated | 1) )
-        goto fail;
-
-    if ( !(memflags & MEMF_no_refcount) )
-    {
-        if ( d->tot_pages >= d->max_pages )
-            goto fail;
-        domain_adjust_tot_pages(d, 1);
-    }
-
-    page->count_info = PGC_allocated | 1;
-    page_set_owner(page, d);
-    page_list_add_tail(page,&d->page_list);
-
-    spin_unlock(&d->page_alloc_lock);
-    return 0;
-
- fail:
-    spin_unlock(&d->page_alloc_lock);
-    gdprintk(XENLOG_WARNING, "Bad donate mfn %" PRI_mfn
-             " to d%d (owner d%d) caf=%08lx taf=%" PRtype_info "\n",
-             mfn_x(page_to_mfn(page)), d->domain_id,
-             owner ? owner->domain_id : DOMID_INVALID,
-             page->count_info, page->u.inuse.type_info);
-    return -EINVAL;
-}
-
-int steal_page(
-    struct domain *d, struct page_info *page, unsigned int memflags)
-{
-    unsigned long x, y;
-    bool drop_dom_ref = false;
-    const struct domain *owner = dom_xen;
-
-    if ( paging_mode_external(d) )
-        return -EOPNOTSUPP;
-
-    spin_lock(&d->page_alloc_lock);
-
-    if ( is_xen_heap_page(page) || ((owner = page_get_owner(page)) != d) )
-        goto fail;
-
-    /*
-     * We require there is just one reference (PGC_allocated). We temporarily
-     * drop this reference now so that we can safely swizzle the owner.
-     */
-    y = page->count_info;
-    do {
-        x = y;
-        if ( (x & (PGC_count_mask|PGC_allocated)) != (1 | PGC_allocated) )
-            goto fail;
-        y = cmpxchg(&page->count_info, x, x & ~PGC_count_mask);
-    } while ( y != x );
-
-    /*
-     * With the sole reference dropped temporarily, no-one can update type
-     * information. Type count also needs to be zero in this case, but e.g.
-     * PGT_seg_desc_page may still have PGT_validated set, which we need to
-     * clear before transferring ownership (as validation criteria vary
-     * depending on domain type).
-     */
-    BUG_ON(page->u.inuse.type_info & (PGT_count_mask | PGT_locked |
-                                      PGT_pinned));
-    page->u.inuse.type_info = 0;
-
-    /* Swizzle the owner then reinstate the PGC_allocated reference. */
-    page_set_owner(page, NULL);
-    y = page->count_info;
-    do {
-        x = y;
-        BUG_ON((x & (PGC_count_mask|PGC_allocated)) != PGC_allocated);
-    } while ( (y = cmpxchg(&page->count_info, x, x | 1)) != x );
-
-    /* Unlink from original owner. */
-    if ( !(memflags & MEMF_no_refcount) && !domain_adjust_tot_pages(d, -1) )
-        drop_dom_ref = true;
-    page_list_del(page, &d->page_list);
-
-    spin_unlock(&d->page_alloc_lock);
-    if ( unlikely(drop_dom_ref) )
-        put_domain(d);
-    return 0;
+    spin_unlock(&d->page_alloc_lock);
+    if ( unlikely(drop_dom_ref) )
+        put_domain(d);
+    return 0;
 
  fail:
     spin_unlock(&d->page_alloc_lock);
@@ -2739,122 +1431,6 @@  int steal_page(
     return -EINVAL;
 }
 
-static int __do_update_va_mapping(
-    unsigned long va, u64 val64, unsigned long flags, struct domain *pg_owner)
-{
-    l1_pgentry_t   val = l1e_from_intpte(val64);
-    struct vcpu   *v   = current;
-    struct domain *d   = v->domain;
-    struct page_info *gl1pg;
-    l1_pgentry_t  *pl1e;
-    unsigned long  bmap_ptr;
-    mfn_t          gl1mfn;
-    cpumask_t     *mask = NULL;
-    int            rc;
-
-    perfc_incr(calls_to_update_va);
-
-    rc = xsm_update_va_mapping(XSM_TARGET, d, pg_owner, val);
-    if ( rc )
-        return rc;
-
-    rc = -EINVAL;
-    pl1e = map_guest_l1e(va, &gl1mfn);
-    if ( unlikely(!pl1e || !get_page_from_mfn(gl1mfn, d)) )
-        goto out;
-
-    gl1pg = mfn_to_page(gl1mfn);
-    if ( !page_lock(gl1pg) )
-    {
-        put_page(gl1pg);
-        goto out;
-    }
-
-    if ( (gl1pg->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table )
-    {
-        page_unlock(gl1pg);
-        put_page(gl1pg);
-        goto out;
-    }
-
-    rc = mod_l1_entry(pl1e, val, mfn_x(gl1mfn), 0, v, pg_owner);
-
-    page_unlock(gl1pg);
-    put_page(gl1pg);
-
- out:
-    if ( pl1e )
-        unmap_domain_page(pl1e);
-
-    switch ( flags & UVMF_FLUSHTYPE_MASK )
-    {
-    case UVMF_TLB_FLUSH:
-        switch ( (bmap_ptr = flags & ~UVMF_FLUSHTYPE_MASK) )
-        {
-        case UVMF_LOCAL:
-            flush_tlb_local();
-            break;
-        case UVMF_ALL:
-            mask = d->domain_dirty_cpumask;
-            break;
-        default:
-            mask = this_cpu(scratch_cpumask);
-            rc = vcpumask_to_pcpumask(d, const_guest_handle_from_ptr(bmap_ptr,
-                                                                     void),
-                                      mask);
-            break;
-        }
-        if ( mask )
-            flush_tlb_mask(mask);
-        break;
-
-    case UVMF_INVLPG:
-        switch ( (bmap_ptr = flags & ~UVMF_FLUSHTYPE_MASK) )
-        {
-        case UVMF_LOCAL:
-            paging_invlpg(v, va);
-            break;
-        case UVMF_ALL:
-            mask = d->domain_dirty_cpumask;
-            break;
-        default:
-            mask = this_cpu(scratch_cpumask);
-            rc = vcpumask_to_pcpumask(d, const_guest_handle_from_ptr(bmap_ptr,
-                                                                     void),
-                                      mask);
-            break;
-        }
-        if ( mask )
-            flush_tlb_one_mask(mask, va);
-        break;
-    }
-
-    return rc;
-}
-
-long do_update_va_mapping(unsigned long va, u64 val64,
-                          unsigned long flags)
-{
-    return __do_update_va_mapping(va, val64, flags, current->domain);
-}
-
-long do_update_va_mapping_otherdomain(unsigned long va, u64 val64,
-                                      unsigned long flags,
-                                      domid_t domid)
-{
-    struct domain *pg_owner;
-    int rc;
-
-    if ( (pg_owner = get_pg_owner(domid)) == NULL )
-        return -ESRCH;
-
-    rc = __do_update_va_mapping(va, val64, flags, pg_owner);
-
-    put_pg_owner(pg_owner);
-
-    return rc;
-}
-
 typedef struct e820entry e820entry_t;
 DEFINE_XEN_GUEST_HANDLE(e820entry_t);
 
diff --git a/xen/arch/x86/pv/Makefile b/xen/arch/x86/pv/Makefile
index bac2792aa2..17e058db7e 100644
--- a/xen/arch/x86/pv/Makefile
+++ b/xen/arch/x86/pv/Makefile
@@ -10,6 +10,7 @@  obj-y += hypercall.o
 obj-y += iret.o
 obj-y += misc-hypercalls.o
 obj-y += mm.o
+obj-y += mm-hypercalls.o
 obj-y += ro-page-fault.o
 obj-y += traps.o
 
diff --git a/xen/arch/x86/pv/mm-hypercalls.c b/xen/arch/x86/pv/mm-hypercalls.c
new file mode 100644
index 0000000000..29a609391c
--- /dev/null
+++ b/xen/arch/x86/pv/mm-hypercalls.c
@@ -0,0 +1,1461 @@ 
+/******************************************************************************
+ * arch/x86/pv/mm-hypercalls.c
+ *
+ * Memory management hypercalls for PV guests
+ *
+ * Copyright (c) 2002-2005 K A Fraser
+ * Copyright (c) 2004 Christian Limpach
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <xen/event.h>
+#include <xen/guest_access.h>
+
+#include <asm/hypercall.h>
+#include <asm/iocap.h>
+#include <asm/ldt.h>
+#include <asm/mm.h>
+#include <asm/p2m.h>
+#include <asm/pv/mm.h>
+#include <asm/setup.h>
+
+#include <xsm/xsm.h>
+
+#include "mm.h"
+
+/* Override macros from asm/page.h to make them work with mfn_t */
+#undef mfn_to_page
+#define mfn_to_page(mfn) __mfn_to_page(mfn_x(mfn))
+#undef page_to_mfn
+#define page_to_mfn(pg) _mfn(__page_to_mfn(pg))
+
+static struct domain *get_pg_owner(domid_t domid)
+{
+    struct domain *pg_owner = NULL, *curr = current->domain;
+
+    if ( likely(domid == DOMID_SELF) )
+    {
+        pg_owner = rcu_lock_current_domain();
+        goto out;
+    }
+
+    if ( unlikely(domid == curr->domain_id) )
+    {
+        gdprintk(XENLOG_WARNING, "Cannot specify itself as foreign domain\n");
+        goto out;
+    }
+
+    switch ( domid )
+    {
+    case DOMID_IO:
+        pg_owner = rcu_lock_domain(dom_io);
+        break;
+    case DOMID_XEN:
+        pg_owner = rcu_lock_domain(dom_xen);
+        break;
+    default:
+        if ( (pg_owner = rcu_lock_domain_by_id(domid)) == NULL )
+        {
+            gdprintk(XENLOG_WARNING, "Unknown domain d%d\n", domid);
+            break;
+        }
+        break;
+    }
+
+ out:
+    return pg_owner;
+}
+
+static void put_pg_owner(struct domain *pg_owner)
+{
+    rcu_unlock_domain(pg_owner);
+}
+
+static inline int vcpumask_to_pcpumask(
+    struct domain *d, XEN_GUEST_HANDLE_PARAM(const_void) bmap, cpumask_t *pmask)
+{
+    unsigned int vcpu_id, vcpu_bias, offs;
+    unsigned long vmask;
+    struct vcpu *v;
+    bool is_native = !is_pv_32bit_domain(d);
+
+    cpumask_clear(pmask);
+    for ( vmask = 0, offs = 0; ; ++offs )
+    {
+        vcpu_bias = offs * (is_native ? BITS_PER_LONG : 32);
+        if ( vcpu_bias >= d->max_vcpus )
+            return 0;
+
+        if ( unlikely(is_native ?
+                      copy_from_guest_offset(&vmask, bmap, offs, 1) :
+                      copy_from_guest_offset((unsigned int *)&vmask, bmap,
+                                             offs, 1)) )
+        {
+            cpumask_clear(pmask);
+            return -EFAULT;
+        }
+
+        while ( vmask )
+        {
+            vcpu_id = find_first_set_bit(vmask);
+            vmask &= ~(1UL << vcpu_id);
+            vcpu_id += vcpu_bias;
+            if ( (vcpu_id >= d->max_vcpus) )
+                return 0;
+            if ( ((v = d->vcpu[vcpu_id]) != NULL) )
+                cpumask_or(pmask, pmask, v->vcpu_dirty_cpumask);
+        }
+    }
+}
+
+/*
+ * PTE flags that a guest may change without re-validating the PTE.
+ * All other bits affect translation, caching, or Xen's safety.
+ */
+#define FASTPATH_FLAG_WHITELIST                                     \
+    (_PAGE_NX_BIT | _PAGE_AVAIL_HIGH | _PAGE_AVAIL | _PAGE_GLOBAL | \
+     _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_USER)
+
+/* Update the L1 entry at pl1e to new value nl1e. */
+static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e,
+                        unsigned long gl1mfn, int preserve_ad,
+                        struct vcpu *pt_vcpu, struct domain *pg_dom)
+{
+    l1_pgentry_t ol1e;
+    struct domain *pt_dom = pt_vcpu->domain;
+    int rc = 0;
+
+    if ( unlikely(__copy_from_user(&ol1e, pl1e, sizeof(ol1e)) != 0) )
+        return -EFAULT;
+
+    ASSERT(!paging_mode_refcounts(pt_dom));
+
+    if ( l1e_get_flags(nl1e) & _PAGE_PRESENT )
+    {
+        /* Translate foreign guest addresses. */
+        struct page_info *page = NULL;
+
+        if ( unlikely(l1e_get_flags(nl1e) & l1_disallow_mask(pt_dom)) )
+        {
+            gdprintk(XENLOG_WARNING, "Bad L1 flags %x\n",
+                    l1e_get_flags(nl1e) & l1_disallow_mask(pt_dom));
+            return -EINVAL;
+        }
+
+        if ( paging_mode_translate(pg_dom) )
+        {
+            page = get_page_from_gfn(pg_dom, l1e_get_pfn(nl1e), NULL, P2M_ALLOC);
+            if ( !page )
+                return -EINVAL;
+            nl1e = l1e_from_page(page, l1e_get_flags(nl1e));
+        }
+
+        /* Fast path for sufficiently-similar mappings. */
+        if ( !l1e_has_changed(ol1e, nl1e, ~FASTPATH_FLAG_WHITELIST) )
+        {
+            nl1e = adjust_guest_l1e(nl1e, pt_dom);
+            rc = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, pt_vcpu,
+                              preserve_ad);
+            if ( page )
+                put_page(page);
+            return rc ? 0 : -EBUSY;
+        }
+
+        switch ( rc = get_page_from_l1e(nl1e, pt_dom, pg_dom) )
+        {
+        default:
+            if ( page )
+                put_page(page);
+            return rc;
+        case 0:
+            break;
+        case _PAGE_RW ... _PAGE_RW | PAGE_CACHE_ATTRS:
+            ASSERT(!(rc & ~(_PAGE_RW | PAGE_CACHE_ATTRS)));
+            l1e_flip_flags(nl1e, rc);
+            rc = 0;
+            break;
+        }
+        if ( page )
+            put_page(page);
+
+        nl1e = adjust_guest_l1e(nl1e, pt_dom);
+        if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, pt_vcpu,
+                                    preserve_ad)) )
+        {
+            ol1e = nl1e;
+            rc = -EBUSY;
+        }
+    }
+    else if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, pt_vcpu,
+                                     preserve_ad)) )
+    {
+        return -EBUSY;
+    }
+
+    put_page_from_l1e(ol1e, pt_dom);
+    return rc;
+}
+
+/* Update the L2 entry at pl2e to new value nl2e. pl2e is within frame pfn. */
+static int mod_l2_entry(l2_pgentry_t *pl2e, l2_pgentry_t nl2e,
+                        unsigned long pfn, int preserve_ad, struct vcpu *vcpu)
+{
+    l2_pgentry_t ol2e;
+    struct domain *d = vcpu->domain;
+    struct page_info *l2pg = mfn_to_page(_mfn(pfn));
+    unsigned long type = l2pg->u.inuse.type_info;
+    int rc = 0;
+
+    if ( unlikely(!is_guest_l2_slot(d, type, pgentry_ptr_to_slot(pl2e))) )
+    {
+        gdprintk(XENLOG_WARNING, "L2 update in Xen-private area, slot %#lx\n",
+                 pgentry_ptr_to_slot(pl2e));
+        return -EPERM;
+    }
+
+    if ( unlikely(__copy_from_user(&ol2e, pl2e, sizeof(ol2e)) != 0) )
+        return -EFAULT;
+
+    if ( l2e_get_flags(nl2e) & _PAGE_PRESENT )
+    {
+        if ( unlikely(l2e_get_flags(nl2e) & L2_DISALLOW_MASK) )
+        {
+            gdprintk(XENLOG_WARNING, "Bad L2 flags %x\n",
+                    l2e_get_flags(nl2e) & L2_DISALLOW_MASK);
+            return -EINVAL;
+        }
+
+        /* Fast path for sufficiently-similar mappings. */
+        if ( !l2e_has_changed(ol2e, nl2e, ~FASTPATH_FLAG_WHITELIST) )
+        {
+            nl2e = adjust_guest_l2e(nl2e, d);
+            if ( UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, vcpu, preserve_ad) )
+                return 0;
+            return -EBUSY;
+        }
+
+        if ( unlikely((rc = get_page_from_l2e(nl2e, pfn, d)) < 0) )
+            return rc;
+
+        nl2e = adjust_guest_l2e(nl2e, d);
+        if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, vcpu,
+                                    preserve_ad)) )
+        {
+            ol2e = nl2e;
+            rc = -EBUSY;
+        }
+    }
+    else if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, vcpu,
+                                     preserve_ad)) )
+    {
+        return -EBUSY;
+    }
+
+    put_page_from_l2e(ol2e, pfn);
+    return rc;
+}
+
+/* Update the L3 entry at pl3e to new value nl3e. pl3e is within frame pfn. */
+static int mod_l3_entry(l3_pgentry_t *pl3e, l3_pgentry_t nl3e,
+                        unsigned long pfn, int preserve_ad, struct vcpu *vcpu)
+{
+    l3_pgentry_t ol3e;
+    struct domain *d = vcpu->domain;
+    int rc = 0;
+
+    /*
+     * Disallow updates to final L3 slot. It contains Xen mappings, and it
+     * would be a pain to ensure they remain continuously valid throughout.
+     */
+    if ( is_pv_32bit_domain(d) && (pgentry_ptr_to_slot(pl3e) >= 3) )
+        return -EINVAL;
+
+    if ( unlikely(__copy_from_user(&ol3e, pl3e, sizeof(ol3e)) != 0) )
+        return -EFAULT;
+
+    if ( l3e_get_flags(nl3e) & _PAGE_PRESENT )
+    {
+        if ( unlikely(l3e_get_flags(nl3e) & l3_disallow_mask(d)) )
+        {
+            gdprintk(XENLOG_WARNING, "Bad L3 flags %x\n",
+                    l3e_get_flags(nl3e) & l3_disallow_mask(d));
+            return -EINVAL;
+        }
+
+        /* Fast path for sufficiently-similar mappings. */
+        if ( !l3e_has_changed(ol3e, nl3e, ~FASTPATH_FLAG_WHITELIST) )
+        {
+            nl3e = adjust_guest_l3e(nl3e, d);
+            rc = UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, vcpu, preserve_ad);
+            return rc ? 0 : -EFAULT;
+        }
+
+        rc = get_page_from_l3e(nl3e, pfn, d, 0);
+        if ( unlikely(rc < 0) )
+            return rc;
+        rc = 0;
+
+        nl3e = adjust_guest_l3e(nl3e, d);
+        if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, vcpu,
+                                    preserve_ad)) )
+        {
+            ol3e = nl3e;
+            rc = -EFAULT;
+        }
+    }
+    else if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, vcpu,
+                                     preserve_ad)) )
+    {
+        return -EFAULT;
+    }
+
+    if ( likely(rc == 0) )
+        if ( !create_pae_xen_mappings(d, pl3e) )
+            BUG();
+
+    put_page_from_l3e(ol3e, pfn, 0, 1);
+    return rc;
+}
+
+/* Update the L4 entry at pl4e to new value nl4e. pl4e is within frame pfn. */
+static int mod_l4_entry(l4_pgentry_t *pl4e, l4_pgentry_t nl4e,
+                        unsigned long pfn, int preserve_ad, struct vcpu *vcpu)
+{
+    struct domain *d = vcpu->domain;
+    l4_pgentry_t ol4e;
+    int rc = 0;
+
+    if ( unlikely(!is_guest_l4_slot(d, pgentry_ptr_to_slot(pl4e))) )
+    {
+        gdprintk(XENLOG_WARNING, "L4 update in Xen-private area, slot %#lx\n",
+                 pgentry_ptr_to_slot(pl4e));
+        return -EINVAL;
+    }
+
+    if ( unlikely(__copy_from_user(&ol4e, pl4e, sizeof(ol4e)) != 0) )
+        return -EFAULT;
+
+    if ( l4e_get_flags(nl4e) & _PAGE_PRESENT )
+    {
+        if ( unlikely(l4e_get_flags(nl4e) & L4_DISALLOW_MASK) )
+        {
+            gdprintk(XENLOG_WARNING, "Bad L4 flags %x\n",
+                    l4e_get_flags(nl4e) & L4_DISALLOW_MASK);
+            return -EINVAL;
+        }
+
+        /* Fast path for sufficiently-similar mappings. */
+        if ( !l4e_has_changed(ol4e, nl4e, ~FASTPATH_FLAG_WHITELIST) )
+        {
+            nl4e = adjust_guest_l4e(nl4e, d);
+            rc = UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, vcpu, preserve_ad);
+            return rc ? 0 : -EFAULT;
+        }
+
+        rc = get_page_from_l4e(nl4e, pfn, d, 0);
+        if ( unlikely(rc < 0) )
+            return rc;
+        rc = 0;
+
+        nl4e = adjust_guest_l4e(nl4e, d);
+        if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, vcpu,
+                                    preserve_ad)) )
+        {
+            ol4e = nl4e;
+            rc = -EFAULT;
+        }
+    }
+    else if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, vcpu,
+                                     preserve_ad)) )
+    {
+        return -EFAULT;
+    }
+
+    put_page_from_l4e(ol4e, pfn, 0, 1);
+    return rc;
+}
+
+int new_guest_cr3(mfn_t mfn)
+{
+    struct vcpu *curr = current;
+    struct domain *currd = curr->domain;
+    int rc;
+    mfn_t old_base_mfn;
+
+    if ( is_pv_32bit_domain(currd) )
+    {
+        mfn_t gt_mfn = pagetable_get_mfn(curr->arch.guest_table);
+        l4_pgentry_t *pl4e = map_domain_page(gt_mfn);
+
+        rc = mod_l4_entry(pl4e,
+                          l4e_from_mfn(mfn,
+                                       (_PAGE_PRESENT | _PAGE_RW |
+                                        _PAGE_USER | _PAGE_ACCESSED)),
+                          mfn_x(gt_mfn), 0, curr);
+        unmap_domain_page(pl4e);
+        switch ( rc )
+        {
+        case 0:
+            break;
+        case -EINTR:
+        case -ERESTART:
+            return -ERESTART;
+        default:
+            gdprintk(XENLOG_WARNING,
+                     "Error while installing new compat baseptr %" PRI_mfn "\n",
+                     mfn_x(mfn));
+            return rc;
+        }
+
+        pv_invalidate_shadow_ldt(curr, 0);
+        write_ptbase(curr);
+
+        return 0;
+    }
+
+    rc = put_old_guest_table(curr);
+    if ( unlikely(rc) )
+        return rc;
+
+    old_base_mfn = pagetable_get_mfn(curr->arch.guest_table);
+    /*
+     * This is particularly important when getting restarted after the
+     * previous attempt got preempted in the put-old-MFN phase.
+     */
+    if ( mfn_eq(old_base_mfn, mfn) )
+    {
+        write_ptbase(curr);
+        return 0;
+    }
+
+    rc = paging_mode_refcounts(currd)
+         ? (get_page_from_mfn(mfn, currd) ? 0 : -EINVAL)
+         : get_page_and_type_from_mfn(mfn, PGT_root_page_table, currd, 0, 1);
+    switch ( rc )
+    {
+    case 0:
+        break;
+    case -EINTR:
+    case -ERESTART:
+        return -ERESTART;
+    default:
+        gdprintk(XENLOG_WARNING,
+                 "Error while installing new baseptr %" PRI_mfn "\n",
+                 mfn_x(mfn));
+        return rc;
+    }
+
+    pv_invalidate_shadow_ldt(curr, 0);
+
+    if ( !VM_ASSIST(currd, m2p_strict) && !paging_mode_refcounts(currd) )
+        fill_ro_mpt(mfn);
+    curr->arch.guest_table = pagetable_from_mfn(mfn);
+    update_cr3(curr);
+
+    write_ptbase(curr);
+
+    if ( likely(mfn_x(old_base_mfn) != 0) )
+    {
+        struct page_info *page = mfn_to_page(old_base_mfn);
+
+        if ( paging_mode_refcounts(currd) )
+            put_page(page);
+        else
+            switch ( rc = put_page_and_type_preemptible(page) )
+            {
+            case -EINTR:
+                rc = -ERESTART;
+                /* fallthrough */
+            case -ERESTART:
+                curr->arch.old_guest_table = page;
+                break;
+            default:
+                BUG_ON(rc);
+                break;
+            }
+    }
+
+    return rc;
+}
+
+long do_mmuext_op(XEN_GUEST_HANDLE_PARAM(mmuext_op_t) uops, unsigned int count,
+                  XEN_GUEST_HANDLE_PARAM(uint) pdone, unsigned int foreigndom)
+{
+    struct mmuext_op op;
+    unsigned long type;
+    unsigned int i, done = 0;
+    struct vcpu *curr = current;
+    struct domain *currd = curr->domain;
+    struct domain *pg_owner;
+    int rc = put_old_guest_table(curr);
+
+    if ( unlikely(rc) )
+    {
+        if ( likely(rc == -ERESTART) )
+            rc = hypercall_create_continuation(
+                     __HYPERVISOR_mmuext_op, "hihi", uops, count, pdone,
+                     foreigndom);
+        return rc;
+    }
+
+    if ( unlikely(count == MMU_UPDATE_PREEMPTED) &&
+         likely(guest_handle_is_null(uops)) )
+    {
+        /*
+         * See the curr->arch.old_guest_table related
+         * hypercall_create_continuation() below.
+         */
+        return (int)foreigndom;
+    }
+
+    if ( unlikely(count & MMU_UPDATE_PREEMPTED) )
+    {
+        count &= ~MMU_UPDATE_PREEMPTED;
+        if ( unlikely(!guest_handle_is_null(pdone)) )
+            (void)copy_from_guest(&done, pdone, 1);
+    }
+    else
+        perfc_incr(calls_to_mmuext_op);
+
+    if ( unlikely(!guest_handle_okay(uops, count)) )
+        return -EFAULT;
+
+    if ( (pg_owner = get_pg_owner(foreigndom)) == NULL )
+        return -ESRCH;
+
+    if ( !is_pv_domain(pg_owner) )
+    {
+        put_pg_owner(pg_owner);
+        return -EINVAL;
+    }
+
+    rc = xsm_mmuext_op(XSM_TARGET, currd, pg_owner);
+    if ( rc )
+    {
+        put_pg_owner(pg_owner);
+        return rc;
+    }
+
+    for ( i = 0; i < count; i++ )
+    {
+        if ( curr->arch.old_guest_table || (i && hypercall_preempt_check()) )
+        {
+            rc = -ERESTART;
+            break;
+        }
+
+        if ( unlikely(__copy_from_guest(&op, uops, 1) != 0) )
+        {
+            rc = -EFAULT;
+            break;
+        }
+
+        if ( is_hvm_domain(currd) )
+        {
+            switch ( op.cmd )
+            {
+            case MMUEXT_PIN_L1_TABLE:
+            case MMUEXT_PIN_L2_TABLE:
+            case MMUEXT_PIN_L3_TABLE:
+            case MMUEXT_PIN_L4_TABLE:
+            case MMUEXT_UNPIN_TABLE:
+                break;
+            default:
+                rc = -EOPNOTSUPP;
+                goto done;
+            }
+        }
+
+        rc = 0;
+
+        switch ( op.cmd )
+        {
+            struct page_info *page;
+            p2m_type_t p2mt;
+
+        case MMUEXT_PIN_L1_TABLE:
+            type = PGT_l1_page_table;
+            goto pin_page;
+
+        case MMUEXT_PIN_L2_TABLE:
+            type = PGT_l2_page_table;
+            goto pin_page;
+
+        case MMUEXT_PIN_L3_TABLE:
+            type = PGT_l3_page_table;
+            goto pin_page;
+
+        case MMUEXT_PIN_L4_TABLE:
+            if ( is_pv_32bit_domain(pg_owner) )
+                break;
+            type = PGT_l4_page_table;
+
+        pin_page:
+            /* Ignore pinning of invalid paging levels. */
+            if ( (op.cmd - MMUEXT_PIN_L1_TABLE) > (CONFIG_PAGING_LEVELS - 1) )
+                break;
+
+            if ( paging_mode_refcounts(pg_owner) )
+                break;
+
+            page = get_page_from_gfn(pg_owner, op.arg1.mfn, NULL, P2M_ALLOC);
+            if ( unlikely(!page) )
+            {
+                rc = -EINVAL;
+                break;
+            }
+
+            rc = get_page_type_preemptible(page, type);
+            if ( unlikely(rc) )
+            {
+                if ( rc == -EINTR )
+                    rc = -ERESTART;
+                else if ( rc != -ERESTART )
+                    gdprintk(XENLOG_WARNING,
+                             "Error %d while pinning mfn %" PRI_mfn "\n",
+                             rc, mfn_x(page_to_mfn(page)));
+                if ( page != curr->arch.old_guest_table )
+                    put_page(page);
+                break;
+            }
+
+            rc = xsm_memory_pin_page(XSM_HOOK, currd, pg_owner, page);
+            if ( !rc && unlikely(test_and_set_bit(_PGT_pinned,
+                                                  &page->u.inuse.type_info)) )
+            {
+                gdprintk(XENLOG_WARNING,
+                         "mfn %" PRI_mfn " already pinned\n",
+                         mfn_x(page_to_mfn(page)));
+                rc = -EINVAL;
+            }
+
+            if ( unlikely(rc) )
+                goto pin_drop;
+
+            /* A page is dirtied when its pin status is set. */
+            paging_mark_dirty(pg_owner, page_to_mfn(page));
+
+            /* We can race domain destruction (domain_relinquish_resources). */
+            if ( unlikely(pg_owner != currd) )
+            {
+                bool drop_ref;
+
+                spin_lock(&pg_owner->page_alloc_lock);
+                drop_ref = (pg_owner->is_dying &&
+                            test_and_clear_bit(_PGT_pinned,
+                                               &page->u.inuse.type_info));
+                spin_unlock(&pg_owner->page_alloc_lock);
+                if ( drop_ref )
+                {
+        pin_drop:
+                    if ( type == PGT_l1_page_table )
+                        put_page_and_type(page);
+                    else
+                        curr->arch.old_guest_table = page;
+                }
+            }
+            break;
+
+        case MMUEXT_UNPIN_TABLE:
+            if ( paging_mode_refcounts(pg_owner) )
+                break;
+
+            page = get_page_from_gfn(pg_owner, op.arg1.mfn, NULL, P2M_ALLOC);
+            if ( unlikely(!page) )
+            {
+                gdprintk(XENLOG_WARNING,
+                         "mfn %" PRI_mfn " bad, or bad owner d%d\n",
+                         op.arg1.mfn, pg_owner->domain_id);
+                rc = -EINVAL;
+                break;
+            }
+
+            if ( !test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info) )
+            {
+                put_page(page);
+                gdprintk(XENLOG_WARNING,
+                         "mfn %" PRI_mfn " not pinned\n", op.arg1.mfn);
+                rc = -EINVAL;
+                break;
+            }
+
+            switch ( rc = put_page_and_type_preemptible(page) )
+            {
+            case -EINTR:
+            case -ERESTART:
+                curr->arch.old_guest_table = page;
+                rc = 0;
+                break;
+            default:
+                BUG_ON(rc);
+                break;
+            }
+            put_page(page);
+
+            /* A page is dirtied when its pin status is cleared. */
+            paging_mark_dirty(pg_owner, page_to_mfn(page));
+            break;
+
+        case MMUEXT_NEW_BASEPTR:
+            if ( unlikely(currd != pg_owner) )
+                rc = -EPERM;
+            else if ( unlikely(paging_mode_translate(currd)) )
+                rc = -EINVAL;
+            else
+                rc = new_guest_cr3(_mfn(op.arg1.mfn));
+            break;
+
+        case MMUEXT_NEW_USER_BASEPTR: {
+            unsigned long old_mfn;
+
+            if ( unlikely(currd != pg_owner) )
+                rc = -EPERM;
+            else if ( unlikely(paging_mode_translate(currd)) )
+                rc = -EINVAL;
+            if ( unlikely(rc) )
+                break;
+
+            old_mfn = pagetable_get_pfn(curr->arch.guest_table_user);
+            /*
+             * This is particularly important when getting restarted after the
+             * previous attempt got preempted in the put-old-MFN phase.
+             */
+            if ( old_mfn == op.arg1.mfn )
+                break;
+
+            if ( op.arg1.mfn != 0 )
+            {
+                rc = get_page_and_type_from_mfn(
+                    _mfn(op.arg1.mfn), PGT_root_page_table, currd, 0, 1);
+
+                if ( unlikely(rc) )
+                {
+                    if ( rc == -EINTR )
+                        rc = -ERESTART;
+                    else if ( rc != -ERESTART )
+                        gdprintk(XENLOG_WARNING,
+                                 "Error %d installing new mfn %" PRI_mfn "\n",
+                                 rc, op.arg1.mfn);
+                    break;
+                }
+
+                if ( VM_ASSIST(currd, m2p_strict) )
+                    zap_ro_mpt(_mfn(op.arg1.mfn));
+            }
+
+            curr->arch.guest_table_user = pagetable_from_pfn(op.arg1.mfn);
+
+            if ( old_mfn != 0 )
+            {
+                page = mfn_to_page(_mfn(old_mfn));
+
+                switch ( rc = put_page_and_type_preemptible(page) )
+                {
+                case -EINTR:
+                    rc = -ERESTART;
+                    /* fallthrough */
+                case -ERESTART:
+                    curr->arch.old_guest_table = page;
+                    break;
+                default:
+                    BUG_ON(rc);
+                    break;
+                }
+            }
+
+            break;
+        }
+
+        case MMUEXT_TLB_FLUSH_LOCAL:
+            if ( likely(currd == pg_owner) )
+                flush_tlb_local();
+            else
+                rc = -EPERM;
+            break;
+
+        case MMUEXT_INVLPG_LOCAL:
+            if ( unlikely(currd != pg_owner) )
+                rc = -EPERM;
+            else
+                paging_invlpg(curr, op.arg1.linear_addr);
+            break;
+
+        case MMUEXT_TLB_FLUSH_MULTI:
+        case MMUEXT_INVLPG_MULTI:
+        {
+            cpumask_t *mask = this_cpu(scratch_cpumask);
+
+            if ( unlikely(currd != pg_owner) )
+                rc = -EPERM;
+            else if ( unlikely(vcpumask_to_pcpumask(currd,
+                                   guest_handle_to_param(op.arg2.vcpumask,
+                                                         const_void),
+                                   mask)) )
+                rc = -EINVAL;
+            if ( unlikely(rc) )
+                break;
+
+            if ( op.cmd == MMUEXT_TLB_FLUSH_MULTI )
+                flush_tlb_mask(mask);
+            else if ( __addr_ok(op.arg1.linear_addr) )
+                flush_tlb_one_mask(mask, op.arg1.linear_addr);
+            break;
+        }
+
+        case MMUEXT_TLB_FLUSH_ALL:
+            if ( likely(currd == pg_owner) )
+                flush_tlb_mask(currd->domain_dirty_cpumask);
+            else
+                rc = -EPERM;
+            break;
+
+        case MMUEXT_INVLPG_ALL:
+            if ( unlikely(currd != pg_owner) )
+                rc = -EPERM;
+            else if ( __addr_ok(op.arg1.linear_addr) )
+                flush_tlb_one_mask(currd->domain_dirty_cpumask,
+                                   op.arg1.linear_addr);
+            break;
+
+        case MMUEXT_FLUSH_CACHE:
+            if ( unlikely(currd != pg_owner) )
+                rc = -EPERM;
+            else if ( unlikely(!cache_flush_permitted(currd)) )
+                rc = -EACCES;
+            else
+                wbinvd();
+            break;
+
+        case MMUEXT_FLUSH_CACHE_GLOBAL:
+            if ( unlikely(currd != pg_owner) )
+                rc = -EPERM;
+            else if ( likely(cache_flush_permitted(currd)) )
+            {
+                unsigned int cpu;
+                cpumask_t *mask = this_cpu(scratch_cpumask);
+
+                cpumask_clear(mask);
+                for_each_online_cpu(cpu)
+                    if ( !cpumask_intersects(mask,
+                                             per_cpu(cpu_sibling_mask, cpu)) )
+                        __cpumask_set_cpu(cpu, mask);
+                flush_mask(mask, FLUSH_CACHE);
+            }
+            else
+                rc = -EINVAL;
+            break;
+
+        case MMUEXT_SET_LDT:
+        {
+            unsigned int ents = op.arg2.nr_ents;
+            unsigned long ptr = ents ? op.arg1.linear_addr : 0;
+
+            if ( unlikely(currd != pg_owner) )
+                rc = -EPERM;
+            else if ( paging_mode_external(currd) )
+                rc = -EINVAL;
+            else if ( ((ptr & (PAGE_SIZE - 1)) != 0) || !__addr_ok(ptr) ||
+                      (ents > 8192) )
+            {
+                gdprintk(XENLOG_WARNING,
+                         "Bad args to SET_LDT: ptr=%lx, ents=%x\n", ptr, ents);
+                rc = -EINVAL;
+            }
+            else if ( (curr->arch.pv_vcpu.ldt_ents != ents) ||
+                      (curr->arch.pv_vcpu.ldt_base != ptr) )
+            {
+                pv_invalidate_shadow_ldt(curr, 0);
+                flush_tlb_local();
+                curr->arch.pv_vcpu.ldt_base = ptr;
+                curr->arch.pv_vcpu.ldt_ents = ents;
+                load_LDT(curr);
+            }
+            break;
+        }
+
+        case MMUEXT_CLEAR_PAGE:
+            page = get_page_from_gfn(pg_owner, op.arg1.mfn, &p2mt, P2M_ALLOC);
+            if ( unlikely(p2mt != p2m_ram_rw) && page )
+            {
+                put_page(page);
+                page = NULL;
+            }
+            if ( !page || !get_page_type(page, PGT_writable_page) )
+            {
+                if ( page )
+                    put_page(page);
+                gdprintk(XENLOG_WARNING,
+                         "Error clearing mfn %" PRI_mfn "\n", op.arg1.mfn);
+                rc = -EINVAL;
+                break;
+            }
+
+            /* A page is dirtied when it's being cleared. */
+            paging_mark_dirty(pg_owner, page_to_mfn(page));
+
+            clear_domain_page(page_to_mfn(page));
+
+            put_page_and_type(page);
+            break;
+
+        case MMUEXT_COPY_PAGE:
+        {
+            struct page_info *src_page, *dst_page;
+
+            src_page = get_page_from_gfn(pg_owner, op.arg2.src_mfn, &p2mt,
+                                         P2M_ALLOC);
+            if ( unlikely(p2mt != p2m_ram_rw) && src_page )
+            {
+                put_page(src_page);
+                src_page = NULL;
+            }
+            if ( unlikely(!src_page) )
+            {
+                gdprintk(XENLOG_WARNING,
+                         "Error copying from mfn %" PRI_mfn "\n",
+                         op.arg2.src_mfn);
+                rc = -EINVAL;
+                break;
+            }
+
+            dst_page = get_page_from_gfn(pg_owner, op.arg1.mfn, &p2mt,
+                                         P2M_ALLOC);
+            if ( unlikely(p2mt != p2m_ram_rw) && dst_page )
+            {
+                put_page(dst_page);
+                dst_page = NULL;
+            }
+            rc = (dst_page &&
+                  get_page_type(dst_page, PGT_writable_page)) ? 0 : -EINVAL;
+            if ( unlikely(rc) )
+            {
+                put_page(src_page);
+                if ( dst_page )
+                    put_page(dst_page);
+                gdprintk(XENLOG_WARNING,
+                         "Error copying to mfn %" PRI_mfn "\n", op.arg1.mfn);
+                break;
+            }
+
+            /* A page is dirtied when it's being copied to. */
+            paging_mark_dirty(pg_owner, page_to_mfn(dst_page));
+
+            copy_domain_page(page_to_mfn(dst_page), page_to_mfn(src_page));
+
+            put_page_and_type(dst_page);
+            put_page(src_page);
+            break;
+        }
+
+        case MMUEXT_MARK_SUPER:
+        case MMUEXT_UNMARK_SUPER:
+            rc = -EOPNOTSUPP;
+            break;
+
+        default:
+            rc = -ENOSYS;
+            break;
+        }
+
+ done:
+        if ( unlikely(rc) )
+            break;
+
+        guest_handle_add_offset(uops, 1);
+    }
+
+    if ( rc == -ERESTART )
+    {
+        ASSERT(i < count);
+        rc = hypercall_create_continuation(
+            __HYPERVISOR_mmuext_op, "hihi",
+            uops, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom);
+    }
+    else if ( curr->arch.old_guest_table )
+    {
+        XEN_GUEST_HANDLE_PARAM(void) null;
+
+        ASSERT(rc || i == count);
+        set_xen_guest_handle(null, NULL);
+        /*
+         * In order to have a way to communicate the final return value to
+         * our continuation, we pass this in place of "foreigndom", building
+         * on the fact that this argument isn't needed anymore.
+         */
+        rc = hypercall_create_continuation(
+                __HYPERVISOR_mmuext_op, "hihi", null,
+                MMU_UPDATE_PREEMPTED, null, rc);
+    }
+
+    put_pg_owner(pg_owner);
+
+    perfc_add(num_mmuext_ops, i);
+
+    /* Add incremental work we have done to the @done output parameter. */
+    if ( unlikely(!guest_handle_is_null(pdone)) )
+    {
+        done += i;
+        copy_to_guest(pdone, &done, 1);
+    }
+
+    return rc;
+}
+
+long do_mmu_update(XEN_GUEST_HANDLE_PARAM(mmu_update_t) ureqs,
+                   unsigned int count, XEN_GUEST_HANDLE_PARAM(uint) pdone,
+                   unsigned int foreigndom)
+{
+    struct mmu_update req;
+    void *va = NULL;
+    unsigned long gpfn, gmfn, mfn;
+    struct page_info *page;
+    unsigned int cmd, i = 0, done = 0, pt_dom;
+    struct vcpu *curr = current, *v = curr;
+    struct domain *d = v->domain, *pt_owner = d, *pg_owner;
+    mfn_t map_mfn = INVALID_MFN;
+    uint32_t xsm_needed = 0;
+    uint32_t xsm_checked = 0;
+    int rc = put_old_guest_table(curr);
+
+    if ( unlikely(rc) )
+    {
+        if ( likely(rc == -ERESTART) )
+            rc = hypercall_create_continuation(
+                     __HYPERVISOR_mmu_update, "hihi", ureqs, count, pdone,
+                     foreigndom);
+        return rc;
+    }
+
+    if ( unlikely(count == MMU_UPDATE_PREEMPTED) &&
+         likely(guest_handle_is_null(ureqs)) )
+    {
+        /*
+         * See the curr->arch.old_guest_table related
+         * hypercall_create_continuation() below.
+         */
+        return (int)foreigndom;
+    }
+
+    if ( unlikely(count & MMU_UPDATE_PREEMPTED) )
+    {
+        count &= ~MMU_UPDATE_PREEMPTED;
+        if ( unlikely(!guest_handle_is_null(pdone)) )
+            (void)copy_from_guest(&done, pdone, 1);
+    }
+    else
+        perfc_incr(calls_to_mmu_update);
+
+    if ( unlikely(!guest_handle_okay(ureqs, count)) )
+        return -EFAULT;
+
+    if ( (pt_dom = foreigndom >> 16) != 0 )
+    {
+        /* Pagetables belong to a foreign domain (PFD). */
+        if ( (pt_owner = rcu_lock_domain_by_id(pt_dom - 1)) == NULL )
+            return -ESRCH;
+
+        if ( pt_owner == d )
+            rcu_unlock_domain(pt_owner);
+        else if ( !pt_owner->vcpu || (v = pt_owner->vcpu[0]) == NULL )
+        {
+            rc = -EINVAL;
+            goto out;
+        }
+    }
+
+    if ( (pg_owner = get_pg_owner((uint16_t)foreigndom)) == NULL )
+    {
+        rc = -ESRCH;
+        goto out;
+    }
+
+    for ( i = 0; i < count; i++ )
+    {
+        if ( curr->arch.old_guest_table || (i && hypercall_preempt_check()) )
+        {
+            rc = -ERESTART;
+            break;
+        }
+
+        if ( unlikely(__copy_from_guest(&req, ureqs, 1) != 0) )
+        {
+            rc = -EFAULT;
+            break;
+        }
+
+        cmd = req.ptr & (sizeof(l1_pgentry_t)-1);
+
+        switch ( cmd )
+        {
+            /*
+             * MMU_NORMAL_PT_UPDATE: Normal update to any level of page table.
+             * MMU_UPDATE_PT_PRESERVE_AD: As above but also preserve (OR)
+             * current A/D bits.
+             */
+        case MMU_NORMAL_PT_UPDATE:
+        case MMU_PT_UPDATE_PRESERVE_AD:
+        {
+            p2m_type_t p2mt;
+
+            rc = -EOPNOTSUPP;
+            if ( unlikely(paging_mode_refcounts(pt_owner)) )
+                break;
+
+            xsm_needed |= XSM_MMU_NORMAL_UPDATE;
+            if ( get_pte_flags(req.val) & _PAGE_PRESENT )
+            {
+                xsm_needed |= XSM_MMU_UPDATE_READ;
+                if ( get_pte_flags(req.val) & _PAGE_RW )
+                    xsm_needed |= XSM_MMU_UPDATE_WRITE;
+            }
+            if ( xsm_needed != xsm_checked )
+            {
+                rc = xsm_mmu_update(XSM_TARGET, d, pt_owner, pg_owner, xsm_needed);
+                if ( rc )
+                    break;
+                xsm_checked = xsm_needed;
+            }
+            rc = -EINVAL;
+
+            req.ptr -= cmd;
+            gmfn = req.ptr >> PAGE_SHIFT;
+            page = get_page_from_gfn(pt_owner, gmfn, &p2mt, P2M_ALLOC);
+
+            if ( p2m_is_paged(p2mt) )
+            {
+                ASSERT(!page);
+                p2m_mem_paging_populate(pg_owner, gmfn);
+                rc = -ENOENT;
+                break;
+            }
+
+            if ( unlikely(!page) )
+            {
+                gdprintk(XENLOG_WARNING,
+                         "Could not get page for normal update\n");
+                break;
+            }
+
+            mfn = mfn_x(page_to_mfn(page));
+
+            if ( !mfn_eq(_mfn(mfn), map_mfn) )
+            {
+                if ( va )
+                    unmap_domain_page(va);
+                va = map_domain_page(_mfn(mfn));
+                map_mfn = _mfn(mfn);
+            }
+            va = _p(((unsigned long)va & PAGE_MASK) + (req.ptr & ~PAGE_MASK));
+
+            if ( page_lock(page) )
+            {
+                switch ( page->u.inuse.type_info & PGT_type_mask )
+                {
+                case PGT_l1_page_table:
+                {
+                    l1_pgentry_t l1e = l1e_from_intpte(req.val);
+                    p2m_type_t l1e_p2mt = p2m_ram_rw;
+                    struct page_info *target = NULL;
+                    p2m_query_t q = (l1e_get_flags(l1e) & _PAGE_RW) ?
+                                        P2M_UNSHARE : P2M_ALLOC;
+
+                    if ( paging_mode_translate(pg_owner) )
+                        target = get_page_from_gfn(pg_owner, l1e_get_pfn(l1e),
+                                                   &l1e_p2mt, q);
+
+                    if ( p2m_is_paged(l1e_p2mt) )
+                    {
+                        if ( target )
+                            put_page(target);
+                        p2m_mem_paging_populate(pg_owner, l1e_get_pfn(l1e));
+                        rc = -ENOENT;
+                        break;
+                    }
+                    else if ( p2m_ram_paging_in == l1e_p2mt && !target )
+                    {
+                        rc = -ENOENT;
+                        break;
+                    }
+                    /* If we tried to unshare and failed */
+                    else if ( (q & P2M_UNSHARE) && p2m_is_shared(l1e_p2mt) )
+                    {
+                        /* We could not have obtained a page ref. */
+                        ASSERT(target == NULL);
+                        /* And mem_sharing_notify has already been called. */
+                        rc = -ENOMEM;
+                        break;
+                    }
+
+                    rc = mod_l1_entry(va, l1e, mfn,
+                                      cmd == MMU_PT_UPDATE_PRESERVE_AD, v,
+                                      pg_owner);
+                    if ( target )
+                        put_page(target);
+                }
+                break;
+                case PGT_l2_page_table:
+                    rc = mod_l2_entry(va, l2e_from_intpte(req.val), mfn,
+                                      cmd == MMU_PT_UPDATE_PRESERVE_AD, v);
+                    break;
+                case PGT_l3_page_table:
+                    rc = mod_l3_entry(va, l3e_from_intpte(req.val), mfn,
+                                      cmd == MMU_PT_UPDATE_PRESERVE_AD, v);
+                    break;
+                case PGT_l4_page_table:
+                    rc = mod_l4_entry(va, l4e_from_intpte(req.val), mfn,
+                                      cmd == MMU_PT_UPDATE_PRESERVE_AD, v);
+                break;
+                case PGT_writable_page:
+                    perfc_incr(writable_mmu_updates);
+                    if ( paging_write_guest_entry(v, va, req.val, _mfn(mfn)) )
+                        rc = 0;
+                    break;
+                }
+                page_unlock(page);
+                if ( rc == -EINTR )
+                    rc = -ERESTART;
+            }
+            else if ( get_page_type(page, PGT_writable_page) )
+            {
+                perfc_incr(writable_mmu_updates);
+                if ( paging_write_guest_entry(v, va, req.val, _mfn(mfn)) )
+                    rc = 0;
+                put_page_type(page);
+            }
+
+            put_page(page);
+        }
+        break;
+
+        case MMU_MACHPHYS_UPDATE:
+            if ( unlikely(d != pt_owner) )
+            {
+                rc = -EPERM;
+                break;
+            }
+
+            if ( unlikely(paging_mode_translate(pg_owner)) )
+            {
+                rc = -EINVAL;
+                break;
+            }
+
+            mfn = req.ptr >> PAGE_SHIFT;
+            gpfn = req.val;
+
+            xsm_needed |= XSM_MMU_MACHPHYS_UPDATE;
+            if ( xsm_needed != xsm_checked )
+            {
+                rc = xsm_mmu_update(XSM_TARGET, d, NULL, pg_owner, xsm_needed);
+                if ( rc )
+                    break;
+                xsm_checked = xsm_needed;
+            }
+
+            if ( unlikely(!get_page_from_mfn(_mfn(mfn), pg_owner)) )
+            {
+                gdprintk(XENLOG_WARNING,
+                         "Could not get page for mach->phys update\n");
+                rc = -EINVAL;
+                break;
+            }
+
+            set_gpfn_from_mfn(mfn, gpfn);
+
+            paging_mark_dirty(pg_owner, _mfn(mfn));
+
+            put_page(mfn_to_page(_mfn(mfn)));
+            break;
+
+        default:
+            rc = -ENOSYS;
+            break;
+        }
+
+        if ( unlikely(rc) )
+            break;
+
+        guest_handle_add_offset(ureqs, 1);
+    }
+
+    if ( rc == -ERESTART )
+    {
+        ASSERT(i < count);
+        rc = hypercall_create_continuation(
+            __HYPERVISOR_mmu_update, "hihi",
+            ureqs, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom);
+    }
+    else if ( curr->arch.old_guest_table )
+    {
+        XEN_GUEST_HANDLE_PARAM(void) null;
+
+        ASSERT(rc || i == count);
+        set_xen_guest_handle(null, NULL);
+        /*
+         * In order to have a way to communicate the final return value to
+         * our continuation, we pass this in place of "foreigndom", building
+         * on the fact that this argument isn't needed anymore.
+         */
+        rc = hypercall_create_continuation(
+                __HYPERVISOR_mmu_update, "hihi", null,
+                MMU_UPDATE_PREEMPTED, null, rc);
+    }
+
+    put_pg_owner(pg_owner);
+
+    if ( va )
+        unmap_domain_page(va);
+
+    perfc_add(num_page_updates, i);
+
+ out:
+    if ( pt_owner != d )
+        rcu_unlock_domain(pt_owner);
+
+    /* Add incremental work we have done to the @done output parameter. */
+    if ( unlikely(!guest_handle_is_null(pdone)) )
+    {
+        done += i;
+        copy_to_guest(pdone, &done, 1);
+    }
+
+    return rc;
+}
+
+static int __do_update_va_mapping(unsigned long va, uint64_t val64,
+                                  unsigned long flags, struct domain *pg_owner)
+{
+    l1_pgentry_t   val = l1e_from_intpte(val64);
+    struct vcpu   *curr = current;
+    struct domain *currd = curr->domain;
+    struct page_info *gl1pg;
+    l1_pgentry_t  *pl1e;
+    unsigned long  bmap_ptr;
+    mfn_t          gl1mfn;
+    cpumask_t     *mask = NULL;
+    int            rc;
+
+    perfc_incr(calls_to_update_va);
+
+    rc = xsm_update_va_mapping(XSM_TARGET, currd, pg_owner, val);
+    if ( rc )
+        return rc;
+
+    rc = -EINVAL;
+    pl1e = map_guest_l1e(va, &gl1mfn);
+    if ( unlikely(!pl1e || !get_page_from_mfn(gl1mfn, currd)) )
+        goto out;
+
+    gl1pg = mfn_to_page(gl1mfn);
+    if ( !page_lock(gl1pg) )
+    {
+        put_page(gl1pg);
+        goto out;
+    }
+
+    if ( (gl1pg->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table )
+    {
+        page_unlock(gl1pg);
+        put_page(gl1pg);
+        goto out;
+    }
+
+    rc = mod_l1_entry(pl1e, val, mfn_x(gl1mfn), 0, curr, pg_owner);
+
+    page_unlock(gl1pg);
+    put_page(gl1pg);
+
+ out:
+    if ( pl1e )
+        unmap_domain_page(pl1e);
+
+    switch ( flags & UVMF_FLUSHTYPE_MASK )
+    {
+    case UVMF_TLB_FLUSH:
+        switch ( (bmap_ptr = flags & ~UVMF_FLUSHTYPE_MASK) )
+        {
+        case UVMF_LOCAL:
+            flush_tlb_local();
+            break;
+        case UVMF_ALL:
+            mask = currd->domain_dirty_cpumask;
+            break;
+        default:
+            mask = this_cpu(scratch_cpumask);
+            rc = vcpumask_to_pcpumask(currd,
+                                      const_guest_handle_from_ptr(bmap_ptr, void),
+                                      mask);
+            break;
+        }
+        if ( mask )
+            flush_tlb_mask(mask);
+        break;
+
+    case UVMF_INVLPG:
+        switch ( (bmap_ptr = flags & ~UVMF_FLUSHTYPE_MASK) )
+        {
+        case UVMF_LOCAL:
+            paging_invlpg(curr, va);
+            break;
+        case UVMF_ALL:
+            mask = currd->domain_dirty_cpumask;
+            break;
+        default:
+            mask = this_cpu(scratch_cpumask);
+            rc = vcpumask_to_pcpumask(currd,
+                                      const_guest_handle_from_ptr(bmap_ptr, void),
+                                      mask);
+            break;
+        }
+        if ( mask )
+            flush_tlb_one_mask(mask, va);
+        break;
+    }
+
+    return rc;
+}
+
+long do_update_va_mapping(unsigned long va, uint64_t val64,
+                          unsigned long flags)
+{
+    return __do_update_va_mapping(va, val64, flags, current->domain);
+}
+
+long do_update_va_mapping_otherdomain(unsigned long va, uint64_t val64,
+                                      unsigned long flags,
+                                      domid_t domid)
+{
+    struct domain *pg_owner;
+    int rc;
+
+    if ( (pg_owner = get_pg_owner(domid)) == NULL )
+        return -ESRCH;
+
+    rc = __do_update_va_mapping(va, val64, flags, pg_owner);
+
+    put_pg_owner(pg_owner);
+
+    return rc;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */