Message ID | 20240726152206.28411-9-roger.pau@citrix.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | x86: adventures in Address Space Isolation | expand |
On Fri Jul 26, 2024 at 4:21 PM BST, Roger Pau Monne wrote: > In preparation for the function being called from contexts where no domain is > present. > > No functional change intended. > > Signed-off-by: Roger Pau Monné <roger.pau@citrix.com> > --- > xen/arch/x86/include/asm/mm.h | 4 +++- > xen/arch/x86/mm.c | 24 +++++++++++++----------- > xen/arch/x86/mm/hap/hap.c | 3 ++- > xen/arch/x86/mm/shadow/hvm.c | 3 ++- > xen/arch/x86/mm/shadow/multi.c | 7 +++++-- > xen/arch/x86/pv/dom0_build.c | 3 ++- > xen/arch/x86/pv/domain.c | 3 ++- > 7 files changed, 29 insertions(+), 18 deletions(-) > > diff --git a/xen/arch/x86/include/asm/mm.h b/xen/arch/x86/include/asm/mm.h > index b3853ae734fa..076e7009dc99 100644 > --- a/xen/arch/x86/include/asm/mm.h > +++ b/xen/arch/x86/include/asm/mm.h > @@ -375,7 +375,9 @@ int devalidate_page(struct page_info *page, unsigned long type, > > void init_xen_pae_l2_slots(l2_pgentry_t *l2t, const struct domain *d); > void init_xen_l4_slots(l4_pgentry_t *l4t, mfn_t l4mfn, > - const struct domain *d, mfn_t sl4mfn, bool ro_mpt); > + mfn_t sl4mfn, const struct page_info *perdomain_l3, > + bool ro_mpt, bool maybe_compat, bool short_directmap); > + The comment currently in the .c file should probably be here instead, and updated for the new arguments. That said, I'm skeptical 3 booleans is something desirable. It induces a lot of complexity at the call sites (which of the 8 forms of init_xen_l4_slots() do I need here?) and a lot of cognitive overload. I can't propose a solution because I'm still wrapping my head around how the layout (esp. compat layout) fits together. Maybe the booleans can be mapped to an enum? It would also help interpret the callsites as it'd no longer be a sequence of contextless booleans, but a readable identifier. > bool fill_ro_mpt(mfn_t mfn); > void zap_ro_mpt(mfn_t mfn); > > diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c > index a792a300a866..c01b6712143e 100644 > --- a/xen/arch/x86/mm.c > +++ b/xen/arch/x86/mm.c > @@ -1645,14 +1645,9 @@ static int promote_l3_table(struct page_info *page) > * extended directmap. > */ > void init_xen_l4_slots(l4_pgentry_t *l4t, mfn_t l4mfn, > - const struct domain *d, mfn_t sl4mfn, bool ro_mpt) > + mfn_t sl4mfn, const struct page_info *perdomain_l3, > + bool ro_mpt, bool maybe_compat, bool short_directmap) > { > - /* > - * PV vcpus need a shortened directmap. HVM and Idle vcpus get the full > - * directmap. > - */ > - bool short_directmap = !paging_mode_external(d); > - > /* Slot 256: RO M2P (if applicable). */ > l4t[l4_table_offset(RO_MPT_VIRT_START)] = > ro_mpt ? idle_pg_table[l4_table_offset(RO_MPT_VIRT_START)] > @@ -1673,13 +1668,14 @@ void init_xen_l4_slots(l4_pgentry_t *l4t, mfn_t l4mfn, > l4e_from_mfn(sl4mfn, __PAGE_HYPERVISOR_RW); > > /* Slot 260: Per-domain mappings. */ > - l4t[l4_table_offset(PERDOMAIN_VIRT_START)] = > - l4e_from_page(d->arch.perdomain_l3_pg, __PAGE_HYPERVISOR_RW); > + if ( perdomain_l3 ) > + l4t[l4_table_offset(PERDOMAIN_VIRT_START)] = > + l4e_from_page(perdomain_l3, __PAGE_HYPERVISOR_RW); > > /* Slot 4: Per-domain mappings mirror. */ > BUILD_BUG_ON(IS_ENABLED(CONFIG_PV32) && > !l4_table_offset(PERDOMAIN_ALT_VIRT_START)); > - if ( !is_pv_64bit_domain(d) ) > + if ( perdomain_l3 && maybe_compat ) > l4t[l4_table_offset(PERDOMAIN_ALT_VIRT_START)] = > l4t[l4_table_offset(PERDOMAIN_VIRT_START)]; > > @@ -1710,6 +1706,10 @@ void init_xen_l4_slots(l4_pgentry_t *l4t, mfn_t l4mfn, > else > #endif > { > + /* > + * PV vcpus need a shortened directmap. HVM and Idle vcpus get the full > + * directmap. > + */ > unsigned int slots = (short_directmap > ? ROOT_PAGETABLE_PV_XEN_SLOTS > : ROOT_PAGETABLE_XEN_SLOTS); > @@ -1830,7 +1830,9 @@ static int promote_l4_table(struct page_info *page) > if ( !rc ) > { > init_xen_l4_slots(pl4e, l4mfn, > - d, INVALID_MFN, VM_ASSIST(d, m2p_strict)); > + INVALID_MFN, d->arch.perdomain_l3_pg, > + VM_ASSIST(d, m2p_strict), !is_pv_64bit_domain(d), > + true); > atomic_inc(&d->arch.pv.nr_l4_pages); > } > unmap_domain_page(pl4e); > diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c > index d2011fde2462..c8514ca0e917 100644 > --- a/xen/arch/x86/mm/hap/hap.c > +++ b/xen/arch/x86/mm/hap/hap.c > @@ -402,7 +402,8 @@ static mfn_t hap_make_monitor_table(struct vcpu *v) > m4mfn = page_to_mfn(pg); > l4e = map_domain_page(m4mfn); > > - init_xen_l4_slots(l4e, m4mfn, d, INVALID_MFN, false); > + init_xen_l4_slots(l4e, m4mfn, INVALID_MFN, d->arch.perdomain_l3_pg, > + false, true, false); Out of ignorance: why is the compat area mapped on HVM monitor PTs? I thought those were used exclusively in hypervisor context, and would hence always have the 512 slots available. > unmap_domain_page(l4e); > > return m4mfn; > diff --git a/xen/arch/x86/mm/shadow/hvm.c b/xen/arch/x86/mm/shadow/hvm.c > index c16f3b3adf32..93922a71e511 100644 > --- a/xen/arch/x86/mm/shadow/hvm.c > +++ b/xen/arch/x86/mm/shadow/hvm.c > @@ -758,7 +758,8 @@ mfn_t sh_make_monitor_table(const struct vcpu *v, unsigned int shadow_levels) > * shadow-linear mapping will either be inserted below when creating > * lower level monitor tables, or later in sh_update_cr3(). > */ > - init_xen_l4_slots(l4e, m4mfn, d, INVALID_MFN, false); > + init_xen_l4_slots(l4e, m4mfn, INVALID_MFN, d->arch.perdomain_l3_pg, > + false, true, false); > > if ( shadow_levels < 4 ) > { > diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c > index 376f6823cd44..0def0c073ca8 100644 > --- a/xen/arch/x86/mm/shadow/multi.c > +++ b/xen/arch/x86/mm/shadow/multi.c > @@ -973,8 +973,11 @@ sh_make_shadow(struct vcpu *v, mfn_t gmfn, u32 shadow_type) > > BUILD_BUG_ON(sizeof(l4_pgentry_t) != sizeof(shadow_l4e_t)); > > - init_xen_l4_slots(l4t, gmfn, d, smfn, (!is_pv_32bit_domain(d) && > - VM_ASSIST(d, m2p_strict))); > + init_xen_l4_slots(l4t, gmfn, smfn, > + d->arch.perdomain_l3_pg, > + (!is_pv_32bit_domain(d) && > + VM_ASSIST(d, m2p_strict)), > + !is_pv_64bit_domain(d), true); > unmap_domain_page(l4t); > } > break; > diff --git a/xen/arch/x86/pv/dom0_build.c b/xen/arch/x86/pv/dom0_build.c > index 41772dbe80bf..6a6689f402bb 100644 > --- a/xen/arch/x86/pv/dom0_build.c > +++ b/xen/arch/x86/pv/dom0_build.c > @@ -711,7 +711,8 @@ int __init dom0_construct_pv(struct domain *d, > l4start = l4tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE; > clear_page(l4tab); > init_xen_l4_slots(l4tab, _mfn(virt_to_mfn(l4start)), > - d, INVALID_MFN, true); > + INVALID_MFN, d->arch.perdomain_l3_pg, > + true, !is_pv_64bit_domain(d), true); > v->arch.guest_table = pagetable_from_paddr(__pa(l4start)); > } > else > diff --git a/xen/arch/x86/pv/domain.c b/xen/arch/x86/pv/domain.c > index 86b74fb372d5..6ff71f14a2f2 100644 > --- a/xen/arch/x86/pv/domain.c > +++ b/xen/arch/x86/pv/domain.c > @@ -124,7 +124,8 @@ static int setup_compat_l4(struct vcpu *v) > mfn = page_to_mfn(pg); > l4tab = map_domain_page(mfn); > clear_page(l4tab); > - init_xen_l4_slots(l4tab, mfn, v->domain, INVALID_MFN, false); > + init_xen_l4_slots(l4tab, mfn, INVALID_MFN, v->domain->arch.perdomain_l3_pg, > + false, true, true); > unmap_domain_page(l4tab); > > /* This page needs to look like a pagetable so that it can be shadowed */
On 29.07.2024 15:36, Alejandro Vallejo wrote: > On Fri Jul 26, 2024 at 4:21 PM BST, Roger Pau Monne wrote: >> --- a/xen/arch/x86/mm/hap/hap.c >> +++ b/xen/arch/x86/mm/hap/hap.c >> @@ -402,7 +402,8 @@ static mfn_t hap_make_monitor_table(struct vcpu *v) >> m4mfn = page_to_mfn(pg); >> l4e = map_domain_page(m4mfn); >> >> - init_xen_l4_slots(l4e, m4mfn, d, INVALID_MFN, false); >> + init_xen_l4_slots(l4e, m4mfn, INVALID_MFN, d->arch.perdomain_l3_pg, >> + false, true, false); > > Out of ignorance: why is the compat area mapped on HVM monitor PTs? I thought > those were used exclusively in hypervisor context, and would hence always have > the 512 slots available. "compat area" is perhaps a misleading term. If you look at the function itself, it's PERDOMAIN_ALT_VIRT_START that is mapped in that case. Which underlies the compat-arg-xlat area, which both 32-bit PV and all HVM guests need, the latter as they can, at any time, switch to 32-bit mode. Jan
On Mon, Jul 29, 2024 at 02:36:39PM +0100, Alejandro Vallejo wrote: > On Fri Jul 26, 2024 at 4:21 PM BST, Roger Pau Monne wrote: > > In preparation for the function being called from contexts where no domain is > > present. > > > > No functional change intended. > > > > Signed-off-by: Roger Pau Monné <roger.pau@citrix.com> > > --- > > xen/arch/x86/include/asm/mm.h | 4 +++- > > xen/arch/x86/mm.c | 24 +++++++++++++----------- > > xen/arch/x86/mm/hap/hap.c | 3 ++- > > xen/arch/x86/mm/shadow/hvm.c | 3 ++- > > xen/arch/x86/mm/shadow/multi.c | 7 +++++-- > > xen/arch/x86/pv/dom0_build.c | 3 ++- > > xen/arch/x86/pv/domain.c | 3 ++- > > 7 files changed, 29 insertions(+), 18 deletions(-) > > > > diff --git a/xen/arch/x86/include/asm/mm.h b/xen/arch/x86/include/asm/mm.h > > index b3853ae734fa..076e7009dc99 100644 > > --- a/xen/arch/x86/include/asm/mm.h > > +++ b/xen/arch/x86/include/asm/mm.h > > @@ -375,7 +375,9 @@ int devalidate_page(struct page_info *page, unsigned long type, > > > > void init_xen_pae_l2_slots(l2_pgentry_t *l2t, const struct domain *d); > > void init_xen_l4_slots(l4_pgentry_t *l4t, mfn_t l4mfn, > > - const struct domain *d, mfn_t sl4mfn, bool ro_mpt); > > + mfn_t sl4mfn, const struct page_info *perdomain_l3, > > + bool ro_mpt, bool maybe_compat, bool short_directmap); > > + > > The comment currently in the .c file should probably be here instead, and > updated for the new arguments. That said, I'm skeptical 3 booleans is something > desirable. It induces a lot of complexity at the call sites (which of the 8 > forms of init_xen_l4_slots() do I need here?) and a lot of cognitive overload. > > I can't propose a solution because I'm still wrapping my head around how the > layout (esp. compat layout) fits together. Maybe the booleans can be mapped to > an enum? It would also help interpret the callsites as it'd no longer be a > sequence of contextless booleans, but a readable identifier. We have the following possible combinations: RO MPT COMPAT XLAT SHORT DMAP PV64 ? N Y PV32 N Y Y HVM N Y N So we would need: enum l4_domain_type { PV64, PV64_RO_MPT, PV32, HVM }; I can see about replacing those last 3 booleans with the proposed enum. Thanks, Roger.
On 26.07.2024 17:21, Roger Pau Monne wrote: > @@ -1673,13 +1668,14 @@ void init_xen_l4_slots(l4_pgentry_t *l4t, mfn_t l4mfn, > l4e_from_mfn(sl4mfn, __PAGE_HYPERVISOR_RW); > > /* Slot 260: Per-domain mappings. */ > - l4t[l4_table_offset(PERDOMAIN_VIRT_START)] = > - l4e_from_page(d->arch.perdomain_l3_pg, __PAGE_HYPERVISOR_RW); > + if ( perdomain_l3 ) > + l4t[l4_table_offset(PERDOMAIN_VIRT_START)] = > + l4e_from_page(perdomain_l3, __PAGE_HYPERVISOR_RW); > > /* Slot 4: Per-domain mappings mirror. */ > BUILD_BUG_ON(IS_ENABLED(CONFIG_PV32) && > !l4_table_offset(PERDOMAIN_ALT_VIRT_START)); > - if ( !is_pv_64bit_domain(d) ) > + if ( perdomain_l3 && maybe_compat ) > l4t[l4_table_offset(PERDOMAIN_ALT_VIRT_START)] = > l4t[l4_table_offset(PERDOMAIN_VIRT_START)]; I think it would be nice if the description could clarify why we need checks of perdomain_l3 twice here. I assume going forward you want to be able to pass in NULL. Therefore, if the conditionals are required, I think it would make sense to have just one, enclosing both (related) writes. Jan
diff --git a/xen/arch/x86/include/asm/mm.h b/xen/arch/x86/include/asm/mm.h index b3853ae734fa..076e7009dc99 100644 --- a/xen/arch/x86/include/asm/mm.h +++ b/xen/arch/x86/include/asm/mm.h @@ -375,7 +375,9 @@ int devalidate_page(struct page_info *page, unsigned long type, void init_xen_pae_l2_slots(l2_pgentry_t *l2t, const struct domain *d); void init_xen_l4_slots(l4_pgentry_t *l4t, mfn_t l4mfn, - const struct domain *d, mfn_t sl4mfn, bool ro_mpt); + mfn_t sl4mfn, const struct page_info *perdomain_l3, + bool ro_mpt, bool maybe_compat, bool short_directmap); + bool fill_ro_mpt(mfn_t mfn); void zap_ro_mpt(mfn_t mfn); diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c index a792a300a866..c01b6712143e 100644 --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -1645,14 +1645,9 @@ static int promote_l3_table(struct page_info *page) * extended directmap. */ void init_xen_l4_slots(l4_pgentry_t *l4t, mfn_t l4mfn, - const struct domain *d, mfn_t sl4mfn, bool ro_mpt) + mfn_t sl4mfn, const struct page_info *perdomain_l3, + bool ro_mpt, bool maybe_compat, bool short_directmap) { - /* - * PV vcpus need a shortened directmap. HVM and Idle vcpus get the full - * directmap. - */ - bool short_directmap = !paging_mode_external(d); - /* Slot 256: RO M2P (if applicable). */ l4t[l4_table_offset(RO_MPT_VIRT_START)] = ro_mpt ? idle_pg_table[l4_table_offset(RO_MPT_VIRT_START)] @@ -1673,13 +1668,14 @@ void init_xen_l4_slots(l4_pgentry_t *l4t, mfn_t l4mfn, l4e_from_mfn(sl4mfn, __PAGE_HYPERVISOR_RW); /* Slot 260: Per-domain mappings. */ - l4t[l4_table_offset(PERDOMAIN_VIRT_START)] = - l4e_from_page(d->arch.perdomain_l3_pg, __PAGE_HYPERVISOR_RW); + if ( perdomain_l3 ) + l4t[l4_table_offset(PERDOMAIN_VIRT_START)] = + l4e_from_page(perdomain_l3, __PAGE_HYPERVISOR_RW); /* Slot 4: Per-domain mappings mirror. */ BUILD_BUG_ON(IS_ENABLED(CONFIG_PV32) && !l4_table_offset(PERDOMAIN_ALT_VIRT_START)); - if ( !is_pv_64bit_domain(d) ) + if ( perdomain_l3 && maybe_compat ) l4t[l4_table_offset(PERDOMAIN_ALT_VIRT_START)] = l4t[l4_table_offset(PERDOMAIN_VIRT_START)]; @@ -1710,6 +1706,10 @@ void init_xen_l4_slots(l4_pgentry_t *l4t, mfn_t l4mfn, else #endif { + /* + * PV vcpus need a shortened directmap. HVM and Idle vcpus get the full + * directmap. + */ unsigned int slots = (short_directmap ? ROOT_PAGETABLE_PV_XEN_SLOTS : ROOT_PAGETABLE_XEN_SLOTS); @@ -1830,7 +1830,9 @@ static int promote_l4_table(struct page_info *page) if ( !rc ) { init_xen_l4_slots(pl4e, l4mfn, - d, INVALID_MFN, VM_ASSIST(d, m2p_strict)); + INVALID_MFN, d->arch.perdomain_l3_pg, + VM_ASSIST(d, m2p_strict), !is_pv_64bit_domain(d), + true); atomic_inc(&d->arch.pv.nr_l4_pages); } unmap_domain_page(pl4e); diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c index d2011fde2462..c8514ca0e917 100644 --- a/xen/arch/x86/mm/hap/hap.c +++ b/xen/arch/x86/mm/hap/hap.c @@ -402,7 +402,8 @@ static mfn_t hap_make_monitor_table(struct vcpu *v) m4mfn = page_to_mfn(pg); l4e = map_domain_page(m4mfn); - init_xen_l4_slots(l4e, m4mfn, d, INVALID_MFN, false); + init_xen_l4_slots(l4e, m4mfn, INVALID_MFN, d->arch.perdomain_l3_pg, + false, true, false); unmap_domain_page(l4e); return m4mfn; diff --git a/xen/arch/x86/mm/shadow/hvm.c b/xen/arch/x86/mm/shadow/hvm.c index c16f3b3adf32..93922a71e511 100644 --- a/xen/arch/x86/mm/shadow/hvm.c +++ b/xen/arch/x86/mm/shadow/hvm.c @@ -758,7 +758,8 @@ mfn_t sh_make_monitor_table(const struct vcpu *v, unsigned int shadow_levels) * shadow-linear mapping will either be inserted below when creating * lower level monitor tables, or later in sh_update_cr3(). */ - init_xen_l4_slots(l4e, m4mfn, d, INVALID_MFN, false); + init_xen_l4_slots(l4e, m4mfn, INVALID_MFN, d->arch.perdomain_l3_pg, + false, true, false); if ( shadow_levels < 4 ) { diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c index 376f6823cd44..0def0c073ca8 100644 --- a/xen/arch/x86/mm/shadow/multi.c +++ b/xen/arch/x86/mm/shadow/multi.c @@ -973,8 +973,11 @@ sh_make_shadow(struct vcpu *v, mfn_t gmfn, u32 shadow_type) BUILD_BUG_ON(sizeof(l4_pgentry_t) != sizeof(shadow_l4e_t)); - init_xen_l4_slots(l4t, gmfn, d, smfn, (!is_pv_32bit_domain(d) && - VM_ASSIST(d, m2p_strict))); + init_xen_l4_slots(l4t, gmfn, smfn, + d->arch.perdomain_l3_pg, + (!is_pv_32bit_domain(d) && + VM_ASSIST(d, m2p_strict)), + !is_pv_64bit_domain(d), true); unmap_domain_page(l4t); } break; diff --git a/xen/arch/x86/pv/dom0_build.c b/xen/arch/x86/pv/dom0_build.c index 41772dbe80bf..6a6689f402bb 100644 --- a/xen/arch/x86/pv/dom0_build.c +++ b/xen/arch/x86/pv/dom0_build.c @@ -711,7 +711,8 @@ int __init dom0_construct_pv(struct domain *d, l4start = l4tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE; clear_page(l4tab); init_xen_l4_slots(l4tab, _mfn(virt_to_mfn(l4start)), - d, INVALID_MFN, true); + INVALID_MFN, d->arch.perdomain_l3_pg, + true, !is_pv_64bit_domain(d), true); v->arch.guest_table = pagetable_from_paddr(__pa(l4start)); } else diff --git a/xen/arch/x86/pv/domain.c b/xen/arch/x86/pv/domain.c index 86b74fb372d5..6ff71f14a2f2 100644 --- a/xen/arch/x86/pv/domain.c +++ b/xen/arch/x86/pv/domain.c @@ -124,7 +124,8 @@ static int setup_compat_l4(struct vcpu *v) mfn = page_to_mfn(pg); l4tab = map_domain_page(mfn); clear_page(l4tab); - init_xen_l4_slots(l4tab, mfn, v->domain, INVALID_MFN, false); + init_xen_l4_slots(l4tab, mfn, INVALID_MFN, v->domain->arch.perdomain_l3_pg, + false, true, true); unmap_domain_page(l4tab); /* This page needs to look like a pagetable so that it can be shadowed */
In preparation for the function being called from contexts where no domain is present. No functional change intended. Signed-off-by: Roger Pau Monné <roger.pau@citrix.com> --- xen/arch/x86/include/asm/mm.h | 4 +++- xen/arch/x86/mm.c | 24 +++++++++++++----------- xen/arch/x86/mm/hap/hap.c | 3 ++- xen/arch/x86/mm/shadow/hvm.c | 3 ++- xen/arch/x86/mm/shadow/multi.c | 7 +++++-- xen/arch/x86/pv/dom0_build.c | 3 ++- xen/arch/x86/pv/domain.c | 3 ++- 7 files changed, 29 insertions(+), 18 deletions(-)