@@ -173,9 +173,14 @@ static int modified_memory(struct domain *d,
static bool allow_p2m_type_change(p2m_type_t old, p2m_type_t new)
{
+ if ( new == p2m_ioreq_server )
+ return old == p2m_ram_rw;
+
+ if ( old == p2m_ioreq_server )
+ return new == p2m_ram_rw;
+
return p2m_is_ram(old) ||
- (p2m_is_hole(old) && new == p2m_mmio_dm) ||
- (old == p2m_ioreq_server && new == p2m_ram_rw);
+ (p2m_is_hole(old) && new == p2m_mmio_dm);
}
static int set_mem_type(struct domain *d,
@@ -202,6 +207,18 @@ static int set_mem_type(struct domain *d,
unlikely(data->mem_type == HVMMEM_unused) )
return -EINVAL;
+ if ( data->mem_type == HVMMEM_ioreq_server )
+ {
+ unsigned int flags;
+
+ if ( !hap_enabled(d) )
+ return -EOPNOTSUPP;
+
+ /* Do not change to HVMMEM_ioreq_server if no ioreq server mapped. */
+ if ( !p2m_get_ioreq_server(d, &flags) )
+ return -EINVAL;
+ }
+
while ( iter < data->nr )
{
unsigned long pfn = data->first_pfn + iter;
@@ -365,6 +382,20 @@ static int dm_op(domid_t domid,
break;
}
+ case XEN_DMOP_map_mem_type_to_ioreq_server:
+ {
+ const struct xen_dm_op_map_mem_type_to_ioreq_server *data =
+ &op.u.map_mem_type_to_ioreq_server;
+
+ rc = -EOPNOTSUPP;
+ if ( !hap_enabled(d) )
+ break;
+
+ rc = hvm_map_mem_type_to_ioreq_server(d, data->id,
+ data->type, data->flags);
+ break;
+ }
+
case XEN_DMOP_set_ioreq_server_state:
{
const struct xen_dm_op_set_ioreq_server_state *data =
@@ -100,6 +100,7 @@ static int hvmemul_do_io(
uint8_t dir, bool_t df, bool_t data_is_addr, uintptr_t data)
{
struct vcpu *curr = current;
+ struct domain *currd = curr->domain;
struct hvm_vcpu_io *vio = &curr->arch.hvm_vcpu.hvm_io;
ioreq_t p = {
.type = is_mmio ? IOREQ_TYPE_COPY : IOREQ_TYPE_PIO,
@@ -141,7 +142,7 @@ static int hvmemul_do_io(
(p.dir != dir) ||
(p.df != df) ||
(p.data_is_ptr != data_is_addr) )
- domain_crash(curr->domain);
+ domain_crash(currd);
if ( data_is_addr )
return X86EMUL_UNHANDLEABLE;
@@ -178,8 +179,60 @@ static int hvmemul_do_io(
break;
case X86EMUL_UNHANDLEABLE:
{
- struct hvm_ioreq_server *s =
- hvm_select_ioreq_server(curr->domain, &p);
+ /*
+ * Xen isn't emulating the instruction internally, so see if there's
+ * an ioreq server that can handle it.
+ *
+ * Rules:
+ * A> PIO or MMIO accesses run through hvm_select_ioreq_server() to
+ * choose the ioreq server by range. If no server is found, the access
+ * is ignored.
+ *
+ * B> p2m_ioreq_server accesses are handled by the designated
+ * ioreq server for the domain, but there are some corner cases:
+ *
+ * - If the domain ioreq server is NULL, it's likely we suffer from
+ * a race with an unmap operation on the ioreq server, so re-try the
+ * instruction.
+ *
+ * Note: Even when an ioreq server is found, its value could become
+ * stale later, because it is possible that
+ *
+ * - the PIO or MMIO address is removed from the rangeset of the
+ * ioreq server, before the event is delivered to the device model.
+ *
+ * - the p2m_ioreq_server type is unmapped from the ioreq server,
+ * before the event is delivered to the device model.
+ *
+ * However, there's no cheap approach to avoid above situations in xen,
+ * so the device model side needs to check the incoming ioreq event.
+ */
+ struct hvm_ioreq_server *s = NULL;
+ p2m_type_t p2mt = p2m_invalid;
+
+ if ( is_mmio )
+ {
+ unsigned long gmfn = paddr_to_pfn(addr);
+
+ get_gfn_query_unlocked(currd, gmfn, &p2mt);
+
+ if ( p2mt == p2m_ioreq_server )
+ {
+ unsigned int flags;
+
+ s = p2m_get_ioreq_server(currd, &flags);
+
+ if ( s == NULL )
+ {
+ rc = X86EMUL_RETRY;
+ vio->io_req.state = STATE_IOREQ_NONE;
+ break;
+ }
+ }
+ }
+
+ if ( !s )
+ s = hvm_select_ioreq_server(currd, &p);
/* If there is no suitable backing DM, just ignore accesses */
if ( !s )
@@ -190,7 +243,7 @@ static int hvmemul_do_io(
else
{
rc = hvm_send_ioreq(s, &p, 0);
- if ( rc != X86EMUL_RETRY || curr->domain->is_shutting_down )
+ if ( rc != X86EMUL_RETRY || currd->is_shutting_down )
vio->io_req.state = STATE_IOREQ_NONE;
else if ( data_is_addr )
rc = X86EMUL_OKAY;
@@ -753,6 +753,8 @@ int hvm_destroy_ioreq_server(struct domain *d, ioservid_t id)
domain_pause(d);
+ p2m_set_ioreq_server(d, 0, s);
+
hvm_ioreq_server_disable(s, 0);
list_del(&s->list_entry);
@@ -914,6 +916,48 @@ int hvm_unmap_io_range_from_ioreq_server(struct domain *d, ioservid_t id,
return rc;
}
+/*
+ * Map or unmap an ioreq server to specific memory type. For now, only
+ * HVMMEM_ioreq_server is supported, and in the future new types can be
+ * introduced, e.g. HVMMEM_ioreq_serverX mapped to ioreq server X. And
+ * currently, only write operations are to be forwarded to an ioreq server.
+ * Support for the emulation of read operations can be added when an ioreq
+ * server has such requirement in the future.
+ */
+int hvm_map_mem_type_to_ioreq_server(struct domain *d, ioservid_t id,
+ uint32_t type, uint32_t flags)
+{
+ struct hvm_ioreq_server *s;
+ int rc;
+
+ if ( type != HVMMEM_ioreq_server )
+ return -EINVAL;
+
+ if ( flags & ~XEN_DMOP_IOREQ_MEM_ACCESS_WRITE )
+ return -EINVAL;
+
+ spin_lock_recursive(&d->arch.hvm_domain.ioreq_server.lock);
+
+ rc = -ENOENT;
+ list_for_each_entry ( s,
+ &d->arch.hvm_domain.ioreq_server.list,
+ list_entry )
+ {
+ if ( s == d->arch.hvm_domain.default_ioreq_server )
+ continue;
+
+ if ( s->id == id )
+ {
+ rc = p2m_set_ioreq_server(d, flags, s);
+ break;
+ }
+ }
+
+ spin_unlock_recursive(&d->arch.hvm_domain.ioreq_server.lock);
+
+ return rc;
+}
+
int hvm_set_ioreq_server_state(struct domain *d, ioservid_t id,
bool_t enabled)
{
@@ -131,6 +131,13 @@ static void ept_p2m_type_to_flags(struct p2m_domain *p2m, ept_entry_t *entry,
entry->r = entry->w = entry->x = 1;
entry->a = entry->d = !!cpu_has_vmx_ept_ad;
break;
+ case p2m_ioreq_server:
+ entry->r = 1;
+ entry->w = !(p2m->ioreq.flags & XEN_DMOP_IOREQ_MEM_ACCESS_WRITE);
+ entry->x = 0;
+ entry->a = !!cpu_has_vmx_ept_ad;
+ entry->d = entry->w && entry->a;
+ break;
case p2m_mmio_direct:
entry->r = entry->x = 1;
entry->w = !rangeset_contains_singleton(mmio_ro_ranges,
@@ -170,7 +177,6 @@ static void ept_p2m_type_to_flags(struct p2m_domain *p2m, ept_entry_t *entry,
entry->a = entry->d = !!cpu_has_vmx_ept_ad;
break;
case p2m_grant_map_ro:
- case p2m_ioreq_server:
entry->r = 1;
entry->w = entry->x = 0;
entry->a = !!cpu_has_vmx_ept_ad;
@@ -70,7 +70,9 @@ static const unsigned long pgt[] = {
PGT_l3_page_table
};
-static unsigned long p2m_type_to_flags(p2m_type_t t, mfn_t mfn,
+static unsigned long p2m_type_to_flags(const struct p2m_domain *p2m,
+ p2m_type_t t,
+ mfn_t mfn,
unsigned int level)
{
unsigned long flags;
@@ -92,8 +94,12 @@ static unsigned long p2m_type_to_flags(p2m_type_t t, mfn_t mfn,
default:
return flags | _PAGE_NX_BIT;
case p2m_grant_map_ro:
- case p2m_ioreq_server:
return flags | P2M_BASE_FLAGS | _PAGE_NX_BIT;
+ case p2m_ioreq_server:
+ flags |= P2M_BASE_FLAGS | _PAGE_RW | _PAGE_NX_BIT;
+ if ( p2m->ioreq.flags & XEN_DMOP_IOREQ_MEM_ACCESS_WRITE )
+ return flags & ~_PAGE_RW;
+ return flags;
case p2m_ram_ro:
case p2m_ram_logdirty:
case p2m_ram_shared:
@@ -440,7 +446,8 @@ static int do_recalc(struct p2m_domain *p2m, unsigned long gfn)
p2m_type_t p2mt = p2m_is_logdirty_range(p2m, gfn & mask, gfn | ~mask)
? p2m_ram_logdirty : p2m_ram_rw;
unsigned long mfn = l1e_get_pfn(e);
- unsigned long flags = p2m_type_to_flags(p2mt, _mfn(mfn), level);
+ unsigned long flags = p2m_type_to_flags(p2m, p2mt,
+ _mfn(mfn), level);
if ( level )
{
@@ -578,7 +585,7 @@ p2m_pt_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn,
ASSERT(!mfn_valid(mfn) || p2mt != p2m_mmio_direct);
l3e_content = mfn_valid(mfn) || p2m_allows_invalid_mfn(p2mt)
? l3e_from_pfn(mfn_x(mfn),
- p2m_type_to_flags(p2mt, mfn, 2) | _PAGE_PSE)
+ p2m_type_to_flags(p2m, p2mt, mfn, 2) | _PAGE_PSE)
: l3e_empty();
entry_content.l1 = l3e_content.l3;
@@ -615,7 +622,7 @@ p2m_pt_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn,
if ( mfn_valid(mfn) || p2m_allows_invalid_mfn(p2mt) )
entry_content = p2m_l1e_from_pfn(mfn_x(mfn),
- p2m_type_to_flags(p2mt, mfn, 0));
+ p2m_type_to_flags(p2m, p2mt, mfn, 0));
else
entry_content = l1e_empty();
@@ -652,7 +659,7 @@ p2m_pt_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn,
ASSERT(!mfn_valid(mfn) || p2mt != p2m_mmio_direct);
if ( mfn_valid(mfn) || p2m_allows_invalid_mfn(p2mt) )
l2e_content = l2e_from_pfn(mfn_x(mfn),
- p2m_type_to_flags(p2mt, mfn, 1) |
+ p2m_type_to_flags(p2m, p2mt, mfn, 1) |
_PAGE_PSE);
else
l2e_content = l2e_empty();
@@ -82,6 +82,8 @@ static int p2m_initialise(struct domain *d, struct p2m_domain *p2m)
else
p2m_pt_init(p2m);
+ spin_lock_init(&p2m->ioreq.lock);
+
return ret;
}
@@ -286,6 +288,62 @@ void p2m_memory_type_changed(struct domain *d)
}
}
+int p2m_set_ioreq_server(struct domain *d,
+ unsigned int flags,
+ struct hvm_ioreq_server *s)
+{
+ struct p2m_domain *p2m = p2m_get_hostp2m(d);
+ int rc;
+
+ /*
+ * Use lock to prevent concurrent setting attempts
+ * from multiple ioreq servers.
+ */
+ spin_lock(&p2m->ioreq.lock);
+
+ /* Unmap ioreq server from p2m type by passing flags with 0. */
+ if ( flags == 0 )
+ {
+ rc = -EINVAL;
+ if ( p2m->ioreq.server != s )
+ goto out;
+
+ p2m->ioreq.server = NULL;
+ p2m->ioreq.flags = 0;
+ }
+ else
+ {
+ rc = -EBUSY;
+ if ( p2m->ioreq.server != NULL )
+ goto out;
+
+ p2m->ioreq.server = s;
+ p2m->ioreq.flags = flags;
+ }
+
+ rc = 0;
+
+ out:
+ spin_unlock(&p2m->ioreq.lock);
+
+ return rc;
+}
+
+struct hvm_ioreq_server *p2m_get_ioreq_server(struct domain *d,
+ unsigned int *flags)
+{
+ struct p2m_domain *p2m = p2m_get_hostp2m(d);
+ struct hvm_ioreq_server *s;
+
+ spin_lock(&p2m->ioreq.lock);
+
+ s = p2m->ioreq.server;
+ *flags = p2m->ioreq.flags;
+
+ spin_unlock(&p2m->ioreq.lock);
+ return s;
+}
+
void p2m_enable_hardware_log_dirty(struct domain *d)
{
struct p2m_domain *p2m = p2m_get_hostp2m(d);
@@ -3306,8 +3306,7 @@ static int sh_page_fault(struct vcpu *v,
}
/* Need to hand off device-model MMIO to the device model */
- if ( p2mt == p2m_mmio_dm
- || (p2mt == p2m_ioreq_server && ft == ft_demand_write) )
+ if ( p2mt == p2m_mmio_dm )
{
gpa = guest_walk_to_gpa(&gw);
goto mmio;
@@ -37,6 +37,8 @@ int hvm_map_io_range_to_ioreq_server(struct domain *d, ioservid_t id,
int hvm_unmap_io_range_from_ioreq_server(struct domain *d, ioservid_t id,
uint32_t type, uint64_t start,
uint64_t end);
+int hvm_map_mem_type_to_ioreq_server(struct domain *d, ioservid_t id,
+ uint32_t type, uint32_t flags);
int hvm_set_ioreq_server_state(struct domain *d, ioservid_t id,
bool_t enabled);
@@ -89,7 +89,8 @@ typedef unsigned int p2m_query_t;
| p2m_to_mask(p2m_ram_paging_out) \
| p2m_to_mask(p2m_ram_paged) \
| p2m_to_mask(p2m_ram_paging_in) \
- | p2m_to_mask(p2m_ram_shared))
+ | p2m_to_mask(p2m_ram_shared) \
+ | p2m_to_mask(p2m_ioreq_server))
/* Types that represent a physmap hole that is ok to replace with a shared
* entry */
@@ -111,8 +112,7 @@ typedef unsigned int p2m_query_t;
#define P2M_RO_TYPES (p2m_to_mask(p2m_ram_logdirty) \
| p2m_to_mask(p2m_ram_ro) \
| p2m_to_mask(p2m_grant_map_ro) \
- | p2m_to_mask(p2m_ram_shared) \
- | p2m_to_mask(p2m_ioreq_server))
+ | p2m_to_mask(p2m_ram_shared))
/* Write-discard types, which should discard the write operations */
#define P2M_DISCARD_WRITE_TYPES (p2m_to_mask(p2m_ram_ro) \
@@ -336,6 +336,20 @@ struct p2m_domain {
struct ept_data ept;
/* NPT-equivalent structure could be added here. */
};
+
+ struct {
+ spinlock_t lock;
+ /*
+ * ioreq server who's responsible for the emulation of
+ * gfns with specific p2m type(for now, p2m_ioreq_server).
+ */
+ struct hvm_ioreq_server *server;
+ /*
+ * flags specifies whether read, write or both operations
+ * are to be emulated by an ioreq server.
+ */
+ unsigned int flags;
+ } ioreq;
};
/* get host p2m table */
@@ -827,6 +841,11 @@ static inline unsigned int p2m_get_iommu_flags(p2m_type_t p2mt, mfn_t mfn)
return flags;
}
+int p2m_set_ioreq_server(struct domain *d, unsigned int flags,
+ struct hvm_ioreq_server *s);
+struct hvm_ioreq_server *p2m_get_ioreq_server(struct domain *d,
+ unsigned int *flags);
+
#endif /* _XEN_ASM_X86_P2M_H */
/*
@@ -318,6 +318,32 @@ struct xen_dm_op_inject_msi {
uint64_aligned_t addr;
};
+/*
+ * XEN_DMOP_map_mem_type_to_ioreq_server : map or unmap the IOREQ Server <id>
+ * to specific memory type <type>
+ * for specific accesses <flags>
+ *
+ * For now, flags only accept the value of XEN_DMOP_IOREQ_MEM_ACCESS_WRITE,
+ * which means only write operations are to be forwarded to an ioreq server.
+ * Support for the emulation of read operations can be added when an ioreq
+ * server has such requirement in future.
+ */
+#define XEN_DMOP_map_mem_type_to_ioreq_server 15
+
+struct xen_dm_op_map_mem_type_to_ioreq_server {
+ ioservid_t id; /* IN - ioreq server id */
+ uint16_t type; /* IN - memory type */
+ uint32_t flags; /* IN - types of accesses to be forwarded to the
+ ioreq server. flags with 0 means to unmap the
+ ioreq server */
+
+#define XEN_DMOP_IOREQ_MEM_ACCESS_READ (1u << 0)
+#define XEN_DMOP_IOREQ_MEM_ACCESS_WRITE (1u << 1)
+
+ uint64_t opaque; /* IN/OUT - only used for hypercall continuation,
+ has to be set to zero by the caller */
+};
+
struct xen_dm_op {
uint32_t op;
uint32_t pad;
@@ -336,6 +362,8 @@ struct xen_dm_op {
struct xen_dm_op_set_mem_type set_mem_type;
struct xen_dm_op_inject_event inject_event;
struct xen_dm_op_inject_msi inject_msi;
+ struct xen_dm_op_map_mem_type_to_ioreq_server
+ map_mem_type_to_ioreq_server;
} u;
};
@@ -93,7 +93,13 @@ typedef enum {
HVMMEM_unused, /* Placeholder; setting memory to this type
will fail for code after 4.7.0 */
#endif
- HVMMEM_ioreq_server
+ HVMMEM_ioreq_server /* Memory type claimed by an ioreq server; type
+ changes to this value are only allowed after
+ an ioreq server has claimed its ownership.
+ Only pages with HVMMEM_ram_rw are allowed to
+ change to this type; conversely, pages with
+ this type are only allowed to be changed back
+ to HVMMEM_ram_rw. */
} hvmmem_type_t;
/* Hint from PV drivers for pagetable destruction. */