@@ -173,9 +173,14 @@ static int modified_memory(struct domain *d,
static bool allow_p2m_type_change(p2m_type_t old, p2m_type_t new)
{
+ if ( new == p2m_ioreq_server )
+ return old == p2m_ram_rw;
+
+ if ( old == p2m_ioreq_server )
+ return new == p2m_ram_rw;
+
return p2m_is_ram(old) ||
- (p2m_is_hole(old) && new == p2m_mmio_dm) ||
- (old == p2m_ioreq_server && new == p2m_ram_rw);
+ (p2m_is_hole(old) && new == p2m_mmio_dm);
}
static int set_mem_type(struct domain *d,
@@ -202,6 +207,19 @@ static int set_mem_type(struct domain *d,
unlikely(data->mem_type == HVMMEM_unused) )
return -EINVAL;
+ if ( data->mem_type == HVMMEM_ioreq_server )
+ {
+ unsigned int flags;
+
+ /* HVMMEM_ioreq_server is only supported for HAP enabled hvm. */
+ if ( !hap_enabled(d) )
+ return -EOPNOTSUPP;
+
+ /* Do not change to HVMMEM_ioreq_server if no ioreq server mapped. */
+ if ( !p2m_get_ioreq_server(d, &flags) )
+ return -EINVAL;
+ }
+
while ( iter < data->nr )
{
unsigned long pfn = data->first_pfn + iter;
@@ -365,6 +383,21 @@ static int dm_op(domid_t domid,
break;
}
+ case XEN_DMOP_map_mem_type_to_ioreq_server:
+ {
+ const struct xen_dm_op_map_mem_type_to_ioreq_server *data =
+ &op.u.map_mem_type_to_ioreq_server;
+
+ rc = -EOPNOTSUPP;
+ /* Only support for HAP enabled hvm. */
+ if ( !hap_enabled(d) )
+ break;
+
+ rc = hvm_map_mem_type_to_ioreq_server(d, data->id,
+ data->type, data->flags);
+ break;
+ }
+
case XEN_DMOP_set_ioreq_server_state:
{
const struct xen_dm_op_set_ioreq_server_state *data =
@@ -99,6 +99,7 @@ static int hvmemul_do_io(
uint8_t dir, bool_t df, bool_t data_is_addr, uintptr_t data)
{
struct vcpu *curr = current;
+ struct domain *currd = curr->domain;
struct hvm_vcpu_io *vio = &curr->arch.hvm_vcpu.hvm_io;
ioreq_t p = {
.type = is_mmio ? IOREQ_TYPE_COPY : IOREQ_TYPE_PIO,
@@ -140,7 +141,7 @@ static int hvmemul_do_io(
(p.dir != dir) ||
(p.df != df) ||
(p.data_is_ptr != data_is_addr) )
- domain_crash(curr->domain);
+ domain_crash(currd);
if ( data_is_addr )
return X86EMUL_UNHANDLEABLE;
@@ -177,8 +178,64 @@ static int hvmemul_do_io(
break;
case X86EMUL_UNHANDLEABLE:
{
- struct hvm_ioreq_server *s =
- hvm_select_ioreq_server(curr->domain, &p);
+ /*
+ * Xen isn't emulating the instruction internally, so see if
+ * there's an ioreq server that can handle it. Rules:
+ *
+ * - PIO and "normal" MMIO run through hvm_select_ioreq_server()
+ * to choose the ioreq server by range. If no server is found,
+ * the access is ignored.
+ *
+ * - p2m_ioreq_server accesses are handled by the designated
+ * ioreq_server for the domain, but there are some corner
+ * cases:
+ *
+ * - If the domain ioreq_server is NULL, assume there is a
+ * race between the unbinding of ioreq server and guest fault
+ * so re-try the instruction.
+ */
+ struct hvm_ioreq_server *s = NULL;
+ p2m_type_t p2mt = p2m_invalid;
+
+ if ( is_mmio )
+ {
+ unsigned long gmfn = paddr_to_pfn(addr);
+
+ get_gfn_query_unlocked(currd, gmfn, &p2mt);
+
+ if ( p2mt == p2m_ioreq_server )
+ {
+ unsigned int flags;
+
+ /*
+ * Value of s could be stale, when we lost a race
+ * with dm_op which unmaps p2m_ioreq_server from the
+ * ioreq server. Yet there's no cheap way to avoid
+ * this, so device model need to do the check.
+ */
+ s = p2m_get_ioreq_server(currd, &flags);
+
+ /*
+ * If p2mt is ioreq_server but ioreq_server is NULL,
+ * we probably lost a race with unbinding of ioreq
+ * server, just retry the access.
+ */
+ if ( s == NULL )
+ {
+ rc = X86EMUL_RETRY;
+ vio->io_req.state = STATE_IOREQ_NONE;
+ break;
+ }
+ }
+ }
+
+ /*
+ * Value of s could be stale, when we lost a race with dm_op
+ * which unmaps this PIO/MMIO address from the ioreq server.
+ * The device model side need to do the check.
+ */
+ if ( !s )
+ s = hvm_select_ioreq_server(currd, &p);
/* If there is no suitable backing DM, just ignore accesses */
if ( !s )
@@ -189,7 +246,7 @@ static int hvmemul_do_io(
else
{
rc = hvm_send_ioreq(s, &p, 0);
- if ( rc != X86EMUL_RETRY || curr->domain->is_shutting_down )
+ if ( rc != X86EMUL_RETRY || currd->is_shutting_down )
vio->io_req.state = STATE_IOREQ_NONE;
else if ( data_is_addr )
rc = X86EMUL_OKAY;
@@ -753,6 +753,8 @@ int hvm_destroy_ioreq_server(struct domain *d, ioservid_t id)
domain_pause(d);
+ p2m_destroy_ioreq_server(d, s);
+
hvm_ioreq_server_disable(s, 0);
list_del(&s->list_entry);
@@ -914,6 +916,42 @@ int hvm_unmap_io_range_from_ioreq_server(struct domain *d, ioservid_t id,
return rc;
}
+int hvm_map_mem_type_to_ioreq_server(struct domain *d, ioservid_t id,
+ uint32_t type, uint32_t flags)
+{
+ struct hvm_ioreq_server *s;
+ int rc;
+
+ /* For now, only HVMMEM_ioreq_server is supported. */
+ if ( type != HVMMEM_ioreq_server )
+ return -EINVAL;
+
+ /* For now, only write emulation is supported. */
+ if ( flags & ~(XEN_DMOP_IOREQ_MEM_ACCESS_WRITE) )
+ return -EINVAL;
+
+ spin_lock_recursive(&d->arch.hvm_domain.ioreq_server.lock);
+
+ rc = -ENOENT;
+ list_for_each_entry ( s,
+ &d->arch.hvm_domain.ioreq_server.list,
+ list_entry )
+ {
+ if ( s == d->arch.hvm_domain.default_ioreq_server )
+ continue;
+
+ if ( s->id == id )
+ {
+ rc = p2m_set_ioreq_server(d, flags, s);
+ break;
+ }
+ }
+
+ spin_unlock_recursive(&d->arch.hvm_domain.ioreq_server.lock);
+
+ return rc;
+}
+
int hvm_set_ioreq_server_state(struct domain *d, ioservid_t id,
bool_t enabled)
{
@@ -172,7 +172,7 @@ nestedhap_walk_L0_p2m(struct p2m_domain *p2m, paddr_t L1_gpa, paddr_t *L0_gpa,
if ( *p2mt == p2m_mmio_direct )
goto direct_mmio_out;
rc = NESTEDHVM_PAGEFAULT_MMIO;
- if ( *p2mt == p2m_mmio_dm )
+ if ( *p2mt == p2m_mmio_dm || *p2mt == p2m_ioreq_server )
goto out;
rc = NESTEDHVM_PAGEFAULT_L0_ERROR;
@@ -131,6 +131,13 @@ static void ept_p2m_type_to_flags(struct p2m_domain *p2m, ept_entry_t *entry,
entry->r = entry->w = entry->x = 1;
entry->a = entry->d = !!cpu_has_vmx_ept_ad;
break;
+ case p2m_ioreq_server:
+ entry->r = 1;
+ entry->w = !(p2m->ioreq.flags & XEN_DMOP_IOREQ_MEM_ACCESS_WRITE);
+ entry->x = 0;
+ entry->a = !!cpu_has_vmx_ept_ad;
+ entry->d = entry->w && entry->a;
+ break;
case p2m_mmio_direct:
entry->r = entry->x = 1;
entry->w = !rangeset_contains_singleton(mmio_ro_ranges,
@@ -170,7 +177,6 @@ static void ept_p2m_type_to_flags(struct p2m_domain *p2m, ept_entry_t *entry,
entry->a = entry->d = !!cpu_has_vmx_ept_ad;
break;
case p2m_grant_map_ro:
- case p2m_ioreq_server:
entry->r = 1;
entry->w = entry->x = 0;
entry->a = !!cpu_has_vmx_ept_ad;
@@ -70,7 +70,9 @@ static const unsigned long pgt[] = {
PGT_l3_page_table
};
-static unsigned long p2m_type_to_flags(p2m_type_t t, mfn_t mfn,
+static unsigned long p2m_type_to_flags(const struct p2m_domain *p2m,
+ p2m_type_t t,
+ mfn_t mfn,
unsigned int level)
{
unsigned long flags;
@@ -92,8 +94,12 @@ static unsigned long p2m_type_to_flags(p2m_type_t t, mfn_t mfn,
default:
return flags | _PAGE_NX_BIT;
case p2m_grant_map_ro:
- case p2m_ioreq_server:
return flags | P2M_BASE_FLAGS | _PAGE_NX_BIT;
+ case p2m_ioreq_server:
+ flags |= P2M_BASE_FLAGS | _PAGE_RW | _PAGE_NX_BIT;
+ if ( p2m->ioreq.flags & XEN_DMOP_IOREQ_MEM_ACCESS_WRITE )
+ return flags & ~_PAGE_RW;
+ return flags;
case p2m_ram_ro:
case p2m_ram_logdirty:
case p2m_ram_shared:
@@ -440,7 +446,8 @@ static int do_recalc(struct p2m_domain *p2m, unsigned long gfn)
p2m_type_t p2mt = p2m_is_logdirty_range(p2m, gfn & mask, gfn | ~mask)
? p2m_ram_logdirty : p2m_ram_rw;
unsigned long mfn = l1e_get_pfn(e);
- unsigned long flags = p2m_type_to_flags(p2mt, _mfn(mfn), level);
+ unsigned long flags = p2m_type_to_flags(p2m, p2mt,
+ _mfn(mfn), level);
if ( level )
{
@@ -578,7 +585,7 @@ p2m_pt_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn,
ASSERT(!mfn_valid(mfn) || p2mt != p2m_mmio_direct);
l3e_content = mfn_valid(mfn) || p2m_allows_invalid_mfn(p2mt)
? l3e_from_pfn(mfn_x(mfn),
- p2m_type_to_flags(p2mt, mfn, 2) | _PAGE_PSE)
+ p2m_type_to_flags(p2m, p2mt, mfn, 2) | _PAGE_PSE)
: l3e_empty();
entry_content.l1 = l3e_content.l3;
@@ -615,7 +622,7 @@ p2m_pt_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn,
if ( mfn_valid(mfn) || p2m_allows_invalid_mfn(p2mt) )
entry_content = p2m_l1e_from_pfn(mfn_x(mfn),
- p2m_type_to_flags(p2mt, mfn, 0));
+ p2m_type_to_flags(p2m, p2mt, mfn, 0));
else
entry_content = l1e_empty();
@@ -652,7 +659,7 @@ p2m_pt_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn,
ASSERT(!mfn_valid(mfn) || p2mt != p2m_mmio_direct);
if ( mfn_valid(mfn) || p2m_allows_invalid_mfn(p2mt) )
l2e_content = l2e_from_pfn(mfn_x(mfn),
- p2m_type_to_flags(p2mt, mfn, 1) |
+ p2m_type_to_flags(p2m, p2mt, mfn, 1) |
_PAGE_PSE);
else
l2e_content = l2e_empty();
@@ -82,6 +82,8 @@ static int p2m_initialise(struct domain *d, struct p2m_domain *p2m)
else
p2m_pt_init(p2m);
+ spin_lock_init(&p2m->ioreq.lock);
+
return ret;
}
@@ -286,6 +288,78 @@ void p2m_memory_type_changed(struct domain *d)
}
}
+int p2m_set_ioreq_server(struct domain *d,
+ unsigned int flags,
+ struct hvm_ioreq_server *s)
+{
+ struct p2m_domain *p2m = p2m_get_hostp2m(d);
+ int rc;
+
+ /*
+ * Use lock to prevent concurrent setting attempts
+ * from multiple ioreq serers.
+ */
+ spin_lock(&p2m->ioreq.lock);
+
+ /* Unmap ioreq server from p2m type by passing flags with 0. */
+ if ( flags == 0 )
+ {
+ rc = -EINVAL;
+ if ( p2m->ioreq.server != s )
+ goto out;
+
+ p2m->ioreq.server = NULL;
+ p2m->ioreq.flags = 0;
+ }
+ else
+ {
+ rc = -EBUSY;
+ if ( p2m->ioreq.server != NULL )
+ goto out;
+
+ p2m->ioreq.server = s;
+ p2m->ioreq.flags = flags;
+ }
+
+ rc = 0;
+
+ out:
+ spin_unlock(&p2m->ioreq.lock);
+
+ return rc;
+}
+
+struct hvm_ioreq_server *p2m_get_ioreq_server(struct domain *d,
+ unsigned int *flags)
+{
+ struct p2m_domain *p2m = p2m_get_hostp2m(d);
+ struct hvm_ioreq_server *s;
+
+ spin_lock(&p2m->ioreq.lock);
+
+ s = p2m->ioreq.server;
+ *flags = p2m->ioreq.flags;
+
+ spin_unlock(&p2m->ioreq.lock);
+ return s;
+}
+
+void p2m_destroy_ioreq_server(const struct domain *d,
+ const struct hvm_ioreq_server *s)
+{
+ struct p2m_domain *p2m = p2m_get_hostp2m(d);
+
+ spin_lock(&p2m->ioreq.lock);
+
+ if ( p2m->ioreq.server == s )
+ {
+ p2m->ioreq.server = NULL;
+ p2m->ioreq.flags = 0;
+ }
+
+ spin_unlock(&p2m->ioreq.lock);
+}
+
void p2m_enable_hardware_log_dirty(struct domain *d)
{
struct p2m_domain *p2m = p2m_get_hostp2m(d);
@@ -3269,8 +3269,7 @@ static int sh_page_fault(struct vcpu *v,
}
/* Need to hand off device-model MMIO to the device model */
- if ( p2mt == p2m_mmio_dm
- || (p2mt == p2m_ioreq_server && ft == ft_demand_write) )
+ if ( p2mt == p2m_mmio_dm )
{
gpa = guest_walk_to_gpa(&gw);
goto mmio;
@@ -37,6 +37,8 @@ int hvm_map_io_range_to_ioreq_server(struct domain *d, ioservid_t id,
int hvm_unmap_io_range_from_ioreq_server(struct domain *d, ioservid_t id,
uint32_t type, uint64_t start,
uint64_t end);
+int hvm_map_mem_type_to_ioreq_server(struct domain *d, ioservid_t id,
+ uint32_t type, uint32_t flags);
int hvm_set_ioreq_server_state(struct domain *d, ioservid_t id,
bool_t enabled);
@@ -89,7 +89,8 @@ typedef unsigned int p2m_query_t;
| p2m_to_mask(p2m_ram_paging_out) \
| p2m_to_mask(p2m_ram_paged) \
| p2m_to_mask(p2m_ram_paging_in) \
- | p2m_to_mask(p2m_ram_shared))
+ | p2m_to_mask(p2m_ram_shared) \
+ | p2m_to_mask(p2m_ioreq_server))
/* Types that represent a physmap hole that is ok to replace with a shared
* entry */
@@ -111,8 +112,7 @@ typedef unsigned int p2m_query_t;
#define P2M_RO_TYPES (p2m_to_mask(p2m_ram_logdirty) \
| p2m_to_mask(p2m_ram_ro) \
| p2m_to_mask(p2m_grant_map_ro) \
- | p2m_to_mask(p2m_ram_shared) \
- | p2m_to_mask(p2m_ioreq_server))
+ | p2m_to_mask(p2m_ram_shared))
/* Write-discard types, which should discard the write operations */
#define P2M_DISCARD_WRITE_TYPES (p2m_to_mask(p2m_ram_ro) \
@@ -336,6 +336,20 @@ struct p2m_domain {
struct ept_data ept;
/* NPT-equivalent structure could be added here. */
};
+
+ struct {
+ spinlock_t lock;
+ /*
+ * ioreq server who's responsible for the emulation of
+ * gfns with specific p2m type(for now, p2m_ioreq_server).
+ */
+ struct hvm_ioreq_server *server;
+ /*
+ * flags specifies whether read, write or both operations
+ * are to be emulated by an ioreq server.
+ */
+ unsigned int flags;
+ } ioreq;
};
/* get host p2m table */
@@ -827,6 +841,12 @@ static inline unsigned int p2m_get_iommu_flags(p2m_type_t p2mt, mfn_t mfn)
return flags;
}
+int p2m_set_ioreq_server(struct domain *d, unsigned int flags,
+ struct hvm_ioreq_server *s);
+struct hvm_ioreq_server *p2m_get_ioreq_server(struct domain *d,
+ unsigned int *flags);
+void p2m_destroy_ioreq_server(const struct domain *d, const struct hvm_ioreq_server *s);
+
#endif /* _XEN_ASM_X86_P2M_H */
/*
@@ -318,6 +318,32 @@ struct xen_dm_op_inject_msi {
uint64_aligned_t addr;
};
+/*
+ * XEN_DMOP_map_mem_type_to_ioreq_server : map or unmap the IOREQ Server <id>
+ * to specific memroy type <type>
+ * for specific accesses <flags>
+ *
+ * For now, flags only accept the value of XEN_DMOP_IOREQ_MEM_ACCESS_WRITE,
+ * which means only write operations are to be forwarded to an ioreq server.
+ * Support for the emulation of read operations can be added when an ioreq
+ * server has such requirement in future.
+ */
+#define XEN_DMOP_map_mem_type_to_ioreq_server 15
+
+struct xen_dm_op_map_mem_type_to_ioreq_server {
+ ioservid_t id; /* IN - ioreq server id */
+ uint16_t type; /* IN - memory type */
+ uint32_t flags; /* IN - types of accesses to be forwarded to the
+ ioreq server. flags with 0 means to unmap the
+ ioreq server */
+
+#define XEN_DMOP_IOREQ_MEM_ACCESS_READ (1u << 0)
+#define XEN_DMOP_IOREQ_MEM_ACCESS_WRITE (1u << 1)
+
+ uint64_t opaque; /* IN/OUT - only used for hypercall continuation,
+ has to be set to zero by the caller */
+};
+
struct xen_dm_op {
uint32_t op;
uint32_t pad;
@@ -336,6 +362,8 @@ struct xen_dm_op {
struct xen_dm_op_set_mem_type set_mem_type;
struct xen_dm_op_inject_event inject_event;
struct xen_dm_op_inject_msi inject_msi;
+ struct xen_dm_op_map_mem_type_to_ioreq_server
+ map_mem_type_to_ioreq_server;
} u;
};
@@ -93,7 +93,13 @@ typedef enum {
HVMMEM_unused, /* Placeholder; setting memory to this type
will fail for code after 4.7.0 */
#endif
- HVMMEM_ioreq_server
+ HVMMEM_ioreq_server /* Memory type claimed by an ioreq server; type
+ changes to this value are only allowed after
+ an ioreq server has claimed its ownership.
+ Only pages with HVMMEM_ram_rw are allowed to
+ change to this type; conversely, pages with
+ this type are only allowed to be changed back
+ to HVMMEM_ram_rw. */
} hvmmem_type_t;
/* Hint from PV drivers for pagetable destruction. */