Message ID | 20240116192611.41112-10-eliasely@amazon.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Remove the directmap | expand |
On 16.01.2024 20:25, Elias El Yandouzi wrote: > --- a/xen/arch/x86/pv/dom0_build.c > +++ b/xen/arch/x86/pv/dom0_build.c > @@ -382,6 +382,10 @@ int __init dom0_construct_pv(struct domain *d, > l3_pgentry_t *l3tab = NULL, *l3start = NULL; > l2_pgentry_t *l2tab = NULL, *l2start = NULL; > l1_pgentry_t *l1tab = NULL, *l1start = NULL; > + mfn_t l4start_mfn = INVALID_MFN; > + mfn_t l3start_mfn = INVALID_MFN; > + mfn_t l2start_mfn = INVALID_MFN; > + mfn_t l1start_mfn = INVALID_MFN; The reason initializers are needed here is, aiui, the overly large scope of these variables. For example ... > @@ -708,22 +712,32 @@ int __init dom0_construct_pv(struct domain *d, > v->arch.pv.event_callback_cs = FLAT_COMPAT_KERNEL_CS; > } > > +#define UNMAP_MAP_AND_ADVANCE(mfn_var, virt_var, maddr) \ > +do { \ > + unmap_domain_page(virt_var); \ > + mfn_var = maddr_to_mfn(maddr); \ > + maddr += PAGE_SIZE; \ > + virt_var = map_domain_page(mfn_var); \ > +} while ( false ) > + > if ( !compat ) > { > maddr_to_page(mpt_alloc)->u.inuse.type_info = PGT_l4_page_table; > - l4start = l4tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE; > + UNMAP_MAP_AND_ADVANCE(l4start_mfn, l4start, mpt_alloc); > + l4tab = l4start; > clear_page(l4tab); > - init_xen_l4_slots(l4tab, _mfn(virt_to_mfn(l4start)), > - d, INVALID_MFN, true); > - v->arch.guest_table = pagetable_from_paddr(__pa(l4start)); > + init_xen_l4_slots(l4tab, l4start_mfn, d, INVALID_MFN, true); > + v->arch.guest_table = pagetable_from_mfn(l4start_mfn); ... looks to be required only here, while ... > } > else > { > /* Monitor table already created by switch_compat(). */ > - l4start = l4tab = __va(pagetable_get_paddr(v->arch.guest_table)); > + l4start_mfn = pagetable_get_mfn(v->arch.guest_table); > + l4start = l4tab = map_domain_page(l4start_mfn); ... in principle the use of the variable could be avoided here. Below from here there's no further use of it. > @@ -781,30 +797,34 @@ int __init dom0_construct_pv(struct domain *d, > > if ( compat ) > { > - l2_pgentry_t *l2t; > - > /* Ensure the first four L3 entries are all populated. */ > for ( i = 0, l3tab = l3start; i < 4; ++i, ++l3tab ) > { > if ( !l3e_get_intpte(*l3tab) ) > { > maddr_to_page(mpt_alloc)->u.inuse.type_info = PGT_l2_page_table; > - l2tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE; > - clear_page(l2tab); > - *l3tab = l3e_from_paddr(__pa(l2tab), L3_PROT); > + UNMAP_MAP_AND_ADVANCE(l2start_mfn, l2start, mpt_alloc); > + clear_page(l2start); > + *l3tab = l3e_from_mfn(l2start_mfn, L3_PROT); > } The updating of l2start is only conditional here, yet ... > if ( i == 3 ) > l3e_get_page(*l3tab)->u.inuse.type_info |= PGT_pae_xen_l2; > } > > - l2t = map_l2t_from_l3e(l3start[3]); > - init_xen_pae_l2_slots(l2t, d); > - unmap_domain_page(l2t); > + init_xen_pae_l2_slots(l2start, d); ... here you assume it points at the page referenced by the 3rd L3 entry. Question is why the original code is being replaced here in the first place: It was already suitably mapping the page in question. Jan
> On 20/02/2024 10:28, Jan Beulich wrote: >> On 16.01.2024 20:25, Elias El Yandouzi wrote: >> --- a/xen/arch/x86/pv/dom0_build.c >> +++ b/xen/arch/x86/pv/dom0_build.c >> @@ -382,6 +382,10 @@ int __init dom0_construct_pv(struct domain *d, >> l3_pgentry_t *l3tab = NULL, *l3start = NULL; >> l2_pgentry_t *l2tab = NULL, *l2start = NULL; >> l1_pgentry_t *l1tab = NULL, *l1start = NULL; >> + mfn_t l4start_mfn = INVALID_MFN; >> + mfn_t l3start_mfn = INVALID_MFN; >> + mfn_t l2start_mfn = INVALID_MFN; >> + mfn_t l1start_mfn = INVALID_MFN; > > The reason initializers are needed here is, aiui, the overly large scope > of these variables. For example ... > Correct, is it just an observation or do you want me to do anything? >> @@ -708,22 +712,32 @@ int __init dom0_construct_pv(struct domain *d, >> v->arch.pv.event_callback_cs = FLAT_COMPAT_KERNEL_CS; >> } >> >> +#define UNMAP_MAP_AND_ADVANCE(mfn_var, virt_var, maddr) \ >> +do { \ >> + unmap_domain_page(virt_var); \ >> + mfn_var = maddr_to_mfn(maddr); \ >> + maddr += PAGE_SIZE; \ >> + virt_var = map_domain_page(mfn_var); \ >> +} while ( false ) >> + >> if ( !compat ) >> { >> maddr_to_page(mpt_alloc)->u.inuse.type_info = PGT_l4_page_table; >> - l4start = l4tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE; >> + UNMAP_MAP_AND_ADVANCE(l4start_mfn, l4start, mpt_alloc); >> + l4tab = l4start; >> clear_page(l4tab); >> - init_xen_l4_slots(l4tab, _mfn(virt_to_mfn(l4start)), >> - d, INVALID_MFN, true); >> - v->arch.guest_table = pagetable_from_paddr(__pa(l4start)); >> + init_xen_l4_slots(l4tab, l4start_mfn, d, INVALID_MFN, true); >> + v->arch.guest_table = pagetable_from_mfn(l4start_mfn); > > ... looks to be required only here, while ... > >> } >> else >> { >> /* Monitor table already created by switch_compat(). */ >> - l4start = l4tab = __va(pagetable_get_paddr(v->arch.guest_table)); >> + l4start_mfn = pagetable_get_mfn(v->arch.guest_table); >> + l4start = l4tab = map_domain_page(l4start_mfn); > > ... in principle the use of the variable could be avoided here. Below > from here there's no further use of it. > >> @@ -781,30 +797,34 @@ int __init dom0_construct_pv(struct domain *d, >> >> if ( compat ) >> { >> - l2_pgentry_t *l2t; >> - >> /* Ensure the first four L3 entries are all populated. */ >> for ( i = 0, l3tab = l3start; i < 4; ++i, ++l3tab ) >> { >> if ( !l3e_get_intpte(*l3tab) ) >> { >> maddr_to_page(mpt_alloc)->u.inuse.type_info = PGT_l2_page_table; >> - l2tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE; >> - clear_page(l2tab); >> - *l3tab = l3e_from_paddr(__pa(l2tab), L3_PROT); >> + UNMAP_MAP_AND_ADVANCE(l2start_mfn, l2start, mpt_alloc); >> + clear_page(l2start); >> + *l3tab = l3e_from_mfn(l2start_mfn, L3_PROT); >> } > > The updating of l2start is only conditional here, yet ... > >> if ( i == 3 ) >> l3e_get_page(*l3tab)->u.inuse.type_info |= PGT_pae_xen_l2; >> } >> >> - l2t = map_l2t_from_l3e(l3start[3]); >> - init_xen_pae_l2_slots(l2t, d); >> - unmap_domain_page(l2t); >> + init_xen_pae_l2_slots(l2start, d); > > ... here you assume it points at the page referenced by the 3rd L3 entry. Hmm, I missed it when sending the revision and indeed it doesn't look correct. > Question is why the original code is being replaced here in the first > place: It was already suitably mapping the page in question. The code was already suitably mapping the pages in question. This patch doesn't aim to make any functional change, just to rework how the domheap pages are used. The goal of the series is to remove the mappings from the directmap, which means those pages needs to be mapped and unmapped when required. This is all this patch do, see `UNMAP_MAP_AND_ADVANCE()` macro. Elias
On 07.05.2024 17:21, Elias El Yandouzi wrote: > > On 20/02/2024 10:28, Jan Beulich wrote: >>> On 16.01.2024 20:25, Elias El Yandouzi wrote: >>> --- a/xen/arch/x86/pv/dom0_build.c >>> +++ b/xen/arch/x86/pv/dom0_build.c >>> @@ -382,6 +382,10 @@ int __init dom0_construct_pv(struct domain *d, >>> l3_pgentry_t *l3tab = NULL, *l3start = NULL; >>> l2_pgentry_t *l2tab = NULL, *l2start = NULL; >>> l1_pgentry_t *l1tab = NULL, *l1start = NULL; >>> + mfn_t l4start_mfn = INVALID_MFN; >>> + mfn_t l3start_mfn = INVALID_MFN; >>> + mfn_t l2start_mfn = INVALID_MFN; >>> + mfn_t l1start_mfn = INVALID_MFN; >> >> The reason initializers are needed here is, aiui, the overly large scope >> of these variables. For example ... > > Correct, is it just an observation or do you want me to do anything? Where possible reducing the scope of variables would be preferred. Hence why I ... >>> @@ -708,22 +712,32 @@ int __init dom0_construct_pv(struct domain *d, >>> v->arch.pv.event_callback_cs = FLAT_COMPAT_KERNEL_CS; >>> } >>> >>> +#define UNMAP_MAP_AND_ADVANCE(mfn_var, virt_var, maddr) \ >>> +do { \ >>> + unmap_domain_page(virt_var); \ >>> + mfn_var = maddr_to_mfn(maddr); \ >>> + maddr += PAGE_SIZE; \ >>> + virt_var = map_domain_page(mfn_var); \ >>> +} while ( false ) >>> + >>> if ( !compat ) >>> { >>> maddr_to_page(mpt_alloc)->u.inuse.type_info = PGT_l4_page_table; >>> - l4start = l4tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE; >>> + UNMAP_MAP_AND_ADVANCE(l4start_mfn, l4start, mpt_alloc); >>> + l4tab = l4start; >>> clear_page(l4tab); >>> - init_xen_l4_slots(l4tab, _mfn(virt_to_mfn(l4start)), >>> - d, INVALID_MFN, true); >>> - v->arch.guest_table = pagetable_from_paddr(__pa(l4start)); >>> + init_xen_l4_slots(l4tab, l4start_mfn, d, INVALID_MFN, true); >>> + v->arch.guest_table = pagetable_from_mfn(l4start_mfn); >> >> ... looks to be required only here, while ... >> >>> } >>> else >>> { >>> /* Monitor table already created by switch_compat(). */ >>> - l4start = l4tab = __va(pagetable_get_paddr(v->arch.guest_table)); >>> + l4start_mfn = pagetable_get_mfn(v->arch.guest_table); >>> + l4start = l4tab = map_domain_page(l4start_mfn); >> >> ... in principle the use of the variable could be avoided here. Below >> from here there's no further use of it. ... went into some detail towards that possibility. >>> @@ -781,30 +797,34 @@ int __init dom0_construct_pv(struct domain *d, >>> >>> if ( compat ) >>> { >>> - l2_pgentry_t *l2t; >>> - >>> /* Ensure the first four L3 entries are all populated. */ >>> for ( i = 0, l3tab = l3start; i < 4; ++i, ++l3tab ) >>> { >>> if ( !l3e_get_intpte(*l3tab) ) >>> { >>> maddr_to_page(mpt_alloc)->u.inuse.type_info = PGT_l2_page_table; >>> - l2tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE; >>> - clear_page(l2tab); >>> - *l3tab = l3e_from_paddr(__pa(l2tab), L3_PROT); >>> + UNMAP_MAP_AND_ADVANCE(l2start_mfn, l2start, mpt_alloc); >>> + clear_page(l2start); >>> + *l3tab = l3e_from_mfn(l2start_mfn, L3_PROT); >>> } >> >> The updating of l2start is only conditional here, yet ... >> >>> if ( i == 3 ) >>> l3e_get_page(*l3tab)->u.inuse.type_info |= PGT_pae_xen_l2; >>> } >>> >>> - l2t = map_l2t_from_l3e(l3start[3]); >>> - init_xen_pae_l2_slots(l2t, d); >>> - unmap_domain_page(l2t); >>> + init_xen_pae_l2_slots(l2start, d); >> >> ... here you assume it points at the page referenced by the 3rd L3 entry. > > Hmm, I missed it when sending the revision and indeed it doesn't look > correct. > >> Question is why the original code is being replaced here in the first >> place: It was already suitably mapping the page in question. > > The code was already suitably mapping the pages in question. This patch > doesn't aim to make any functional change, just to rework how the > domheap pages are used. The goal of the series is to remove the mappings > from the directmap, which means those pages needs to be mapped and > unmapped when required. But that doesn't address my question: If there's nothing wrong with the earlier code, why does it need changing (right here)? Jan
diff --git a/xen/arch/x86/pv/dom0_build.c b/xen/arch/x86/pv/dom0_build.c index 5659814e0c..dc5e9fe117 100644 --- a/xen/arch/x86/pv/dom0_build.c +++ b/xen/arch/x86/pv/dom0_build.c @@ -382,6 +382,10 @@ int __init dom0_construct_pv(struct domain *d, l3_pgentry_t *l3tab = NULL, *l3start = NULL; l2_pgentry_t *l2tab = NULL, *l2start = NULL; l1_pgentry_t *l1tab = NULL, *l1start = NULL; + mfn_t l4start_mfn = INVALID_MFN; + mfn_t l3start_mfn = INVALID_MFN; + mfn_t l2start_mfn = INVALID_MFN; + mfn_t l1start_mfn = INVALID_MFN; /* * This fully describes the memory layout of the initial domain. All @@ -708,22 +712,32 @@ int __init dom0_construct_pv(struct domain *d, v->arch.pv.event_callback_cs = FLAT_COMPAT_KERNEL_CS; } +#define UNMAP_MAP_AND_ADVANCE(mfn_var, virt_var, maddr) \ +do { \ + unmap_domain_page(virt_var); \ + mfn_var = maddr_to_mfn(maddr); \ + maddr += PAGE_SIZE; \ + virt_var = map_domain_page(mfn_var); \ +} while ( false ) + if ( !compat ) { maddr_to_page(mpt_alloc)->u.inuse.type_info = PGT_l4_page_table; - l4start = l4tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE; + UNMAP_MAP_AND_ADVANCE(l4start_mfn, l4start, mpt_alloc); + l4tab = l4start; clear_page(l4tab); - init_xen_l4_slots(l4tab, _mfn(virt_to_mfn(l4start)), - d, INVALID_MFN, true); - v->arch.guest_table = pagetable_from_paddr(__pa(l4start)); + init_xen_l4_slots(l4tab, l4start_mfn, d, INVALID_MFN, true); + v->arch.guest_table = pagetable_from_mfn(l4start_mfn); } else { /* Monitor table already created by switch_compat(). */ - l4start = l4tab = __va(pagetable_get_paddr(v->arch.guest_table)); + l4start_mfn = pagetable_get_mfn(v->arch.guest_table); + l4start = l4tab = map_domain_page(l4start_mfn); /* See public/xen.h on why the following is needed. */ maddr_to_page(mpt_alloc)->u.inuse.type_info = PGT_l3_page_table; l3start = __va(mpt_alloc); mpt_alloc += PAGE_SIZE; + UNMAP_MAP_AND_ADVANCE(l3start_mfn, l3start, mpt_alloc); } l4tab += l4_table_offset(v_start); @@ -733,14 +747,16 @@ int __init dom0_construct_pv(struct domain *d, if ( !((unsigned long)l1tab & (PAGE_SIZE-1)) ) { maddr_to_page(mpt_alloc)->u.inuse.type_info = PGT_l1_page_table; - l1start = l1tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE; + UNMAP_MAP_AND_ADVANCE(l1start_mfn, l1start, mpt_alloc); + l1tab = l1start; clear_page(l1tab); if ( count == 0 ) l1tab += l1_table_offset(v_start); if ( !((unsigned long)l2tab & (PAGE_SIZE-1)) ) { maddr_to_page(mpt_alloc)->u.inuse.type_info = PGT_l2_page_table; - l2start = l2tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE; + UNMAP_MAP_AND_ADVANCE(l2start_mfn, l2start, mpt_alloc); + l2tab = l2start; clear_page(l2tab); if ( count == 0 ) l2tab += l2_table_offset(v_start); @@ -750,19 +766,19 @@ int __init dom0_construct_pv(struct domain *d, { maddr_to_page(mpt_alloc)->u.inuse.type_info = PGT_l3_page_table; - l3start = __va(mpt_alloc); mpt_alloc += PAGE_SIZE; + UNMAP_MAP_AND_ADVANCE(l3start_mfn, l3start, mpt_alloc); } l3tab = l3start; clear_page(l3tab); if ( count == 0 ) l3tab += l3_table_offset(v_start); - *l4tab = l4e_from_paddr(__pa(l3start), L4_PROT); + *l4tab = l4e_from_mfn(l3start_mfn, L4_PROT); l4tab++; } - *l3tab = l3e_from_paddr(__pa(l2start), L3_PROT); + *l3tab = l3e_from_mfn(l2start_mfn, L3_PROT); l3tab++; } - *l2tab = l2e_from_paddr(__pa(l1start), L2_PROT); + *l2tab = l2e_from_mfn(l1start_mfn, L2_PROT); l2tab++; } if ( count < initrd_pfn || count >= initrd_pfn + PFN_UP(initrd_len) ) @@ -781,30 +797,34 @@ int __init dom0_construct_pv(struct domain *d, if ( compat ) { - l2_pgentry_t *l2t; - /* Ensure the first four L3 entries are all populated. */ for ( i = 0, l3tab = l3start; i < 4; ++i, ++l3tab ) { if ( !l3e_get_intpte(*l3tab) ) { maddr_to_page(mpt_alloc)->u.inuse.type_info = PGT_l2_page_table; - l2tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE; - clear_page(l2tab); - *l3tab = l3e_from_paddr(__pa(l2tab), L3_PROT); + UNMAP_MAP_AND_ADVANCE(l2start_mfn, l2start, mpt_alloc); + clear_page(l2start); + *l3tab = l3e_from_mfn(l2start_mfn, L3_PROT); } if ( i == 3 ) l3e_get_page(*l3tab)->u.inuse.type_info |= PGT_pae_xen_l2; } - l2t = map_l2t_from_l3e(l3start[3]); - init_xen_pae_l2_slots(l2t, d); - unmap_domain_page(l2t); + init_xen_pae_l2_slots(l2start, d); } +#undef UNMAP_MAP_AND_ADVANCE + + UNMAP_DOMAIN_PAGE(l1start); + UNMAP_DOMAIN_PAGE(l2start); + UNMAP_DOMAIN_PAGE(l3start); + /* Pages that are part of page tables must be read only. */ mark_pv_pt_pages_rdonly(d, l4start, vpt_start, nr_pt_pages, &flush_flags); + UNMAP_DOMAIN_PAGE(l4start); + /* Mask all upcalls... */ for ( i = 0; i < XEN_LEGACY_MAX_VCPUS; i++ ) shared_info(d, vcpu_info[i].evtchn_upcall_mask) = 1;