@@ -949,6 +949,14 @@ int hvm_map_mem_type_to_ioreq_server(struct domain *d, ioservid_t id,
spin_unlock_recursive(&d->arch.hvm_domain.ioreq_server.lock);
+ if ( rc == 0 && flags == 0 )
+ {
+ struct p2m_domain *p2m = p2m_get_hostp2m(d);
+
+ if ( read_atomic(&p2m->ioreq.entry_count) )
+ p2m_change_entry_type_global(d, p2m_ioreq_server, p2m_ram_rw);
+ }
+
return rc;
}
@@ -187,6 +187,15 @@ out:
*/
static int hap_enable_log_dirty(struct domain *d, bool_t log_global)
{
+ struct p2m_domain *p2m = p2m_get_hostp2m(d);
+
+ /*
+ * Refuse to turn on global log-dirty mode if
+ * there's outstanding p2m_ioreq_server pages.
+ */
+ if ( log_global && read_atomic(&p2m->ioreq.entry_count) )
+ return -EBUSY;
+
/* turn on PG_log_dirty bit in paging mode */
paging_lock(d);
d->arch.paging.mode |= PG_log_dirty;
@@ -544,6 +544,12 @@ static int resolve_misconfig(struct p2m_domain *p2m, unsigned long gfn)
e.ipat = ipat;
if ( e.recalc && p2m_is_changeable(e.sa_p2mt) )
{
+ if ( e.sa_p2mt == p2m_ioreq_server )
+ {
+ p2m->ioreq.entry_count--;
+ ASSERT(p2m->ioreq.entry_count >= 0);
+ }
+
e.sa_p2mt = p2m_is_logdirty_range(p2m, gfn + i, gfn + i)
? p2m_ram_logdirty : p2m_ram_rw;
ept_p2m_type_to_flags(p2m, &e, e.sa_p2mt, e.access);
@@ -965,7 +971,7 @@ static mfn_t ept_get_entry(struct p2m_domain *p2m,
if ( is_epte_valid(ept_entry) )
{
if ( (recalc || ept_entry->recalc) &&
- p2m_is_changeable(ept_entry->sa_p2mt) )
+ p2m_check_changeable(ept_entry->sa_p2mt) )
*t = p2m_is_logdirty_range(p2m, gfn, gfn) ? p2m_ram_logdirty
: p2m_ram_rw;
else
@@ -436,11 +436,13 @@ static int do_recalc(struct p2m_domain *p2m, unsigned long gfn)
needs_recalc(l1, *pent) )
{
l1_pgentry_t e = *pent;
+ p2m_type_t p2mt_old;
if ( !valid_recalc(l1, e) )
P2M_DEBUG("bogus recalc leaf at d%d:%lx:%u\n",
p2m->domain->domain_id, gfn, level);
- if ( p2m_is_changeable(p2m_flags_to_type(l1e_get_flags(e))) )
+ p2mt_old = p2m_flags_to_type(l1e_get_flags(e));
+ if ( p2m_is_changeable(p2mt_old) )
{
unsigned long mask = ~0UL << (level * PAGETABLE_ORDER);
p2m_type_t p2mt = p2m_is_logdirty_range(p2m, gfn & mask, gfn | ~mask)
@@ -460,6 +462,13 @@ static int do_recalc(struct p2m_domain *p2m, unsigned long gfn)
mfn &= ~(_PAGE_PSE_PAT >> PAGE_SHIFT);
flags |= _PAGE_PSE;
}
+
+ if ( p2mt_old == p2m_ioreq_server )
+ {
+ p2m->ioreq.entry_count--;
+ ASSERT(p2m->ioreq.entry_count >= 0);
+ }
+
e = l1e_from_pfn(mfn, flags);
p2m_add_iommu_flags(&e, level,
(p2mt == p2m_ram_rw)
@@ -729,7 +738,7 @@ p2m_pt_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn,
static inline p2m_type_t recalc_type(bool_t recalc, p2m_type_t t,
struct p2m_domain *p2m, unsigned long gfn)
{
- if ( !recalc || !p2m_is_changeable(t) )
+ if ( !recalc || !p2m_check_changeable(t) )
return t;
return p2m_is_logdirty_range(p2m, gfn, gfn) ? p2m_ram_logdirty
: p2m_ram_rw;
@@ -954,6 +954,26 @@ int p2m_change_type_one(struct domain *d, unsigned long gfn,
p2m->default_access)
: -EBUSY;
+ if ( !rc )
+ {
+ switch ( nt )
+ {
+ case p2m_ram_rw:
+ if ( ot == p2m_ioreq_server )
+ {
+ p2m->ioreq.entry_count--;
+ ASSERT(p2m->ioreq.entry_count >= 0);
+ }
+ break;
+ case p2m_ioreq_server:
+ ASSERT(ot == p2m_ram_rw);
+ p2m->ioreq.entry_count++;
+ break;
+ default:
+ break;
+ }
+ }
+
gfn_unlock(p2m, gfn, 0);
return rc;
@@ -120,7 +120,10 @@ typedef unsigned int p2m_query_t;
/* Types that can be subject to bulk transitions. */
#define P2M_CHANGEABLE_TYPES (p2m_to_mask(p2m_ram_rw) \
- | p2m_to_mask(p2m_ram_logdirty) )
+ | p2m_to_mask(p2m_ram_logdirty) \
+ | p2m_to_mask(p2m_ioreq_server) )
+
+#define P2M_IOREQ_TYPES (p2m_to_mask(p2m_ioreq_server))
#define P2M_POD_TYPES (p2m_to_mask(p2m_populate_on_demand))
@@ -157,6 +160,7 @@ typedef unsigned int p2m_query_t;
#define p2m_is_readonly(_t) (p2m_to_mask(_t) & P2M_RO_TYPES)
#define p2m_is_discard_write(_t) (p2m_to_mask(_t) & P2M_DISCARD_WRITE_TYPES)
#define p2m_is_changeable(_t) (p2m_to_mask(_t) & P2M_CHANGEABLE_TYPES)
+#define p2m_is_ioreq(_t) (p2m_to_mask(_t) & P2M_IOREQ_TYPES)
#define p2m_is_pod(_t) (p2m_to_mask(_t) & P2M_POD_TYPES)
#define p2m_is_grant(_t) (p2m_to_mask(_t) & P2M_GRANT_TYPES)
/* Grant types are *not* considered valid, because they can be
@@ -178,6 +182,8 @@ typedef unsigned int p2m_query_t;
#define p2m_allows_invalid_mfn(t) (p2m_to_mask(t) & P2M_INVALID_MFN_TYPES)
+#define p2m_check_changeable(t) (p2m_is_changeable(t) && !p2m_is_ioreq(t))
+
typedef enum {
p2m_host,
p2m_nested,
@@ -349,6 +355,7 @@ struct p2m_domain {
* are to be emulated by an ioreq server.
*/
unsigned int flags;
+ long entry_count;
} ioreq;
};
After an ioreq server has unmapped, the remaining p2m_ioreq_server entries need to be reset back to p2m_ram_rw. This patch does this asynchronously with the current p2m_change_entry_type_global() interface. This patch also disallows live migration, when there's still any outstanding p2m_ioreq_server entry left. The core reason is our current implementation of p2m_change_entry_type_global() can not tell the state of p2m_ioreq_server entries(can not decide if an entry is to be emulated or to be resynced). Note: new field entry_count is introduced in struct p2m_domain, to record the number of p2m_ioreq_server p2m page table entries. One nature of these entries is that they only point to 4K sized page frames, because all p2m_ioreq_server entries are originated from p2m_ram_rw ones in p2m_change_type_one(). We do not need to worry about the counting for 2M/1G sized pages. Signed-off-by: Yu Zhang <yu.c.zhang@linux.intel.com> --- Cc: Paul Durrant <paul.durrant@citrix.com> Cc: Jan Beulich <jbeulich@suse.com> Cc: Andrew Cooper <andrew.cooper3@citrix.com> Cc: George Dunlap <george.dunlap@eu.citrix.com> Cc: Jun Nakajima <jun.nakajima@intel.com> Cc: Kevin Tian <kevin.tian@intel.com> changes in v4: - According to comments from Jan: use ASSERT() instead of 'if' condition in p2m_change_type_one(). - According to comments from Jan: commit message changes to mention the p2m_ioreq_server are all based on 4K sized pages. changes in v3: - Move the synchronously resetting logic into patch 5. - According to comments from Jan: introduce p2m_check_changeable() to clarify the p2m type change code. - According to comments from George: use locks in the same order to avoid deadlock, call p2m_change_entry_type_global() after unmap of the ioreq server is finished. changes in v2: - Move the calculation of ioreq server page entry_cout into p2m_change_type_one() so that we do not need a seperate lock. Note: entry_count is also calculated in resolve_misconfig()/ do_recalc(), fortunately callers of both routines have p2m lock protected already. - Simplify logic in hvmop_set_mem_type(). - Introduce routine p2m_finish_type_change() to walk the p2m table and do the p2m reset. --- xen/arch/x86/hvm/ioreq.c | 8 ++++++++ xen/arch/x86/mm/hap/hap.c | 9 +++++++++ xen/arch/x86/mm/p2m-ept.c | 8 +++++++- xen/arch/x86/mm/p2m-pt.c | 13 +++++++++++-- xen/arch/x86/mm/p2m.c | 20 ++++++++++++++++++++ xen/include/asm-x86/p2m.h | 9 ++++++++- 6 files changed, 63 insertions(+), 4 deletions(-)