@@ -313,6 +313,8 @@ struct arch_domain
{
struct page_info *perdomain_l3_pg;
+ struct page_info *perdomain_l2_pgs[PERDOMAIN_SLOTS];
+
#ifdef CONFIG_PV32
unsigned int hv_compat_vstart;
#endif
@@ -633,4 +633,8 @@ static inline bool arch_mfns_in_directmap(unsigned long mfn, unsigned long nr)
/* Setup the per-domain slot in the root page table pointer. */
void setup_perdomain_slot(const struct vcpu *v, root_pgentry_t *root_pgt);
+/* Allocate a per-CPU local L3 table to use in the per-domain slot. */
+int allocate_perdomain_local_l3(unsigned int cpu);
+void free_perdomain_local_l3(unsigned int cpu);
+
#endif /* __ASM_X86_MM_H__ */
@@ -6079,6 +6079,12 @@ int create_perdomain_mapping(struct domain *d, unsigned long va,
l2tab = __map_domain_page(pg);
clear_page(l2tab);
l3tab[l3_table_offset(va)] = l3e_from_page(pg, __PAGE_HYPERVISOR_RW);
+ /*
+ * Keep a reference to the per-domain L3 entries in case a per-CPU L3
+ * is in use (as opposed to using perdomain_l3_pg).
+ */
+ ASSERT(!d->creation_finished);
+ d->arch.perdomain_l2_pgs[l3_table_offset(va)] = pg;
}
else
l2tab = map_l2t_from_l3e(l3tab[l3_table_offset(va)]);
@@ -6368,11 +6374,79 @@ unsigned long get_upper_mfn_bound(void)
return min(max_mfn, 1UL << (paddr_bits - PAGE_SHIFT)) - 1;
}
+static DEFINE_PER_CPU(l3_pgentry_t *, local_l3);
+
+static void populate_perdomain(const struct domain *d, l4_pgentry_t *l4,
+ l3_pgentry_t *l3)
+{
+ unsigned int i;
+
+ /* Populate the per-CPU L3 with the per-domain entries. */
+ for ( i = 0; i < ARRAY_SIZE(d->arch.perdomain_l2_pgs); i++ )
+ {
+ const struct page_info *pg = d->arch.perdomain_l2_pgs[i];
+
+ BUILD_BUG_ON(ARRAY_SIZE(d->arch.perdomain_l2_pgs) >
+ L3_PAGETABLE_ENTRIES);
+ l3e_write(&l3[i], pg ? l3e_from_page(pg, __PAGE_HYPERVISOR_RW)
+ : l3e_empty());
+ }
+
+ l4e_write(&l4[l4_table_offset(PERDOMAIN_VIRT_START)],
+ l4e_from_mfn(virt_to_mfn(l3), __PAGE_HYPERVISOR_RW));
+}
+
+int allocate_perdomain_local_l3(unsigned int cpu)
+{
+ const struct domain *d = idle_vcpu[cpu]->domain;
+ l3_pgentry_t *l3;
+ root_pgentry_t *root_pgt = maddr_to_virt(idle_vcpu[cpu]->arch.cr3);
+
+ ASSERT(!per_cpu(local_l3, cpu));
+
+ if ( !opt_asi_pv && !opt_asi_hvm )
+ return 0;
+
+ l3 = alloc_xenheap_page();
+ if ( !l3 )
+ return -ENOMEM;
+
+ clear_page(l3);
+
+ /* Setup the idle domain slots (current domain) in the L3. */
+ populate_perdomain(d, root_pgt, l3);
+
+ per_cpu(local_l3, cpu) = l3;
+
+ return 0;
+}
+
+void free_perdomain_local_l3(unsigned int cpu)
+{
+ l3_pgentry_t *l3 = per_cpu(local_l3, cpu);
+
+ if ( !l3 )
+ return;
+
+ per_cpu(local_l3, cpu) = NULL;
+ free_xenheap_page(l3);
+}
+
void setup_perdomain_slot(const struct vcpu *v, root_pgentry_t *root_pgt)
{
- l4e_write(&root_pgt[root_table_offset(PERDOMAIN_VIRT_START)],
- l4e_from_page(v->domain->arch.perdomain_l3_pg,
- __PAGE_HYPERVISOR_RW));
+ const struct domain *d = v->domain;
+
+ if ( d->arch.asi )
+ {
+ l3_pgentry_t *l3 = this_cpu(local_l3);
+
+ ASSERT(l3);
+ populate_perdomain(d, root_pgt, l3);
+ }
+ else if ( is_hvm_domain(d) || d->arch.pv.xpti )
+ l4e_write(&root_pgt[root_table_offset(PERDOMAIN_VIRT_START)],
+ l4e_from_page(v->domain->arch.perdomain_l3_pg,
+ __PAGE_HYPERVISOR_RW));
if ( !is_pv_64bit_vcpu(v) )
/*
@@ -1961,6 +1961,14 @@ void asmlinkage __init noreturn __start_xen(unsigned long mbi_p)
alternative_branches();
+ /*
+ * Setup the local per-domain L3 for the BSP also, so it matches the state
+ * of the APs.
+ */
+ ret = allocate_perdomain_local_l3(0);
+ if ( ret )
+ panic("Error %d setting up local per-domain L3\n", ret);
+
/*
* NB: when running as a PV shim VCPUOP_up/down is wired to the shim
* physical cpu_add/remove functions, so launch the guest with only
@@ -986,6 +986,7 @@ static void cpu_smpboot_free(unsigned int cpu, bool remove)
}
cleanup_cpu_root_pgt(cpu);
+ free_perdomain_local_l3(cpu);
if ( per_cpu(stubs.addr, cpu) )
{
@@ -1100,6 +1101,9 @@ static int cpu_smpboot_alloc(unsigned int cpu)
per_cpu(stubs.addr, cpu) = stub_page + STUB_BUF_CPU_OFFS(cpu);
rc = setup_cpu_root_pgt(cpu);
+ if ( rc )
+ goto out;
+ rc = allocate_perdomain_local_l3(cpu);
if ( rc )
goto out;
rc = -ENOMEM;
So far L4 slot 260 has always been per-domain, in other words: all vCPUs of a domain share the same L3 entry. Currently only 3 slots are used in that L3 table, which leaves plenty of room. Introduce a per-CPU L3 that's used the the domain has Address Space Isolation enabled. Such per-CPU L3 gets currently populated using the same L3 entries present on the per-domain L3 (d->arch.perdomain_l3_pg). No functional change expected, as the per-CPU L3 is always a copy of the contents of d->arch.perdomain_l3_pg. Note that all the per-domain L3 entries are populated at domain create, and hence there's no need to sync the state of the per-CPU L3 as the domain won't yet be running when the L3 is modified. Signed-off-by: Roger Pau Monné <roger.pau@citrix.com> --- xen/arch/x86/include/asm/domain.h | 2 + xen/arch/x86/include/asm/mm.h | 4 ++ xen/arch/x86/mm.c | 80 +++++++++++++++++++++++++++++-- xen/arch/x86/setup.c | 8 ++++ xen/arch/x86/smpboot.c | 4 ++ 5 files changed, 95 insertions(+), 3 deletions(-)