Message ID | 20221216114853.8227-3-julien@xen.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Remove the directmap | expand |
On 16.12.2022 12:48, Julien Grall wrote: > From: Wei Liu <wei.liu2@citrix.com> > > After the direct map removal, pages from the boot allocator are not > mapped at all in the direct map. Although we have map_domain_page, they Nit: "will not be mapped" or "are not going to be mapped", or else this sounds like there's a bug somewhere. > --- a/xen/arch/x86/setup.c > +++ b/xen/arch/x86/setup.c > @@ -870,6 +870,7 @@ void __init noreturn __start_xen(unsigned long mbi_p) > unsigned long eb_start, eb_end; > bool acpi_boot_table_init_done = false, relocated = false; > int ret; > + bool vm_init_done = false; Can this please be grouped with the other bool-s (even visible in context)? > --- a/xen/common/vmap.c > +++ b/xen/common/vmap.c > @@ -34,9 +34,20 @@ void __init vm_init_type(enum vmap_region type, void *start, void *end) > > for ( i = 0, va = (unsigned long)vm_bitmap(type); i < nr; ++i, va += PAGE_SIZE ) > { > - struct page_info *pg = alloc_domheap_page(NULL, 0); > + mfn_t mfn; > + int rc; > > - map_pages_to_xen(va, page_to_mfn(pg), 1, PAGE_HYPERVISOR); > + if ( system_state == SYS_STATE_early_boot ) > + mfn = alloc_boot_pages(1, 1); > + else > + { > + struct page_info *pg = alloc_domheap_page(NULL, 0); > + > + BUG_ON(!pg); > + mfn = page_to_mfn(pg); > + } > + rc = map_pages_to_xen(va, mfn, 1, PAGE_HYPERVISOR); > + BUG_ON(rc); The adding of a return value check is unrelated and not overly useful: > clear_page((void *)va); This will fault anyway if the mapping attempt failed. Jan
Hi Jan, On 20/12/2022 15:08, Jan Beulich wrote: > On 16.12.2022 12:48, Julien Grall wrote: >> From: Wei Liu <wei.liu2@citrix.com> >> >> After the direct map removal, pages from the boot allocator are not >> mapped at all in the direct map. Although we have map_domain_page, they > > Nit: "will not be mapped" or "are not going to be mapped", or else this > sounds like there's a bug somewhere. I will update. > >> --- a/xen/arch/x86/setup.c >> +++ b/xen/arch/x86/setup.c >> @@ -870,6 +870,7 @@ void __init noreturn __start_xen(unsigned long mbi_p) >> unsigned long eb_start, eb_end; >> bool acpi_boot_table_init_done = false, relocated = false; >> int ret; >> + bool vm_init_done = false; > > Can this please be grouped with the other bool-s (even visible in context)? >> --- a/xen/common/vmap.c >> +++ b/xen/common/vmap.c >> @@ -34,9 +34,20 @@ void __init vm_init_type(enum vmap_region type, void *start, void *end) >> >> for ( i = 0, va = (unsigned long)vm_bitmap(type); i < nr; ++i, va += PAGE_SIZE ) >> { >> - struct page_info *pg = alloc_domheap_page(NULL, 0); >> + mfn_t mfn; >> + int rc; >> >> - map_pages_to_xen(va, page_to_mfn(pg), 1, PAGE_HYPERVISOR); >> + if ( system_state == SYS_STATE_early_boot ) >> + mfn = alloc_boot_pages(1, 1); >> + else >> + { >> + struct page_info *pg = alloc_domheap_page(NULL, 0); >> + >> + BUG_ON(!pg); >> + mfn = page_to_mfn(pg); >> + } >> + rc = map_pages_to_xen(va, mfn, 1, PAGE_HYPERVISOR); >> + BUG_ON(rc); > > The adding of a return value check is unrelated and not overly useful: > >> clear_page((void *)va); > > This will fault anyway if the mapping attempt failed. Not always. At least on Arm, map_pages_to_xen() could fail if the VA was mapped to another physical address. This seems unlikely, yet I think that relying on clear_page() to always fail when map_pages_to_xen() return an error is bogus. Cheers,
On 21.12.2022 11:18, Julien Grall wrote: > On 20/12/2022 15:08, Jan Beulich wrote: >> On 16.12.2022 12:48, Julien Grall wrote: >>> --- a/xen/common/vmap.c >>> +++ b/xen/common/vmap.c >>> @@ -34,9 +34,20 @@ void __init vm_init_type(enum vmap_region type, void *start, void *end) >>> >>> for ( i = 0, va = (unsigned long)vm_bitmap(type); i < nr; ++i, va += PAGE_SIZE ) >>> { >>> - struct page_info *pg = alloc_domheap_page(NULL, 0); >>> + mfn_t mfn; >>> + int rc; >>> >>> - map_pages_to_xen(va, page_to_mfn(pg), 1, PAGE_HYPERVISOR); >>> + if ( system_state == SYS_STATE_early_boot ) >>> + mfn = alloc_boot_pages(1, 1); >>> + else >>> + { >>> + struct page_info *pg = alloc_domheap_page(NULL, 0); >>> + >>> + BUG_ON(!pg); >>> + mfn = page_to_mfn(pg); >>> + } >>> + rc = map_pages_to_xen(va, mfn, 1, PAGE_HYPERVISOR); >>> + BUG_ON(rc); >> >> The adding of a return value check is unrelated and not overly useful: >> >>> clear_page((void *)va); >> >> This will fault anyway if the mapping attempt failed. > > Not always. At least on Arm, map_pages_to_xen() could fail if the VA was > mapped to another physical address. Oh, okay. > This seems unlikely, yet I think that relying on clear_page() to always > fail when map_pages_to_xen() return an error is bogus. Fair enough, but then please at least call out the change (and the reason) in the description. Even better might be to make this a separate change, but I wouldn't insist (quite likely I wouldn't have made this a separate change either). Jan
Hi Jan, On 20/12/2022 15:08, Jan Beulich wrote: > On 16.12.2022 12:48, Julien Grall wrote: >> From: Wei Liu <wei.liu2@citrix.com> >> >> After the direct map removal, pages from the boot allocator are not >> mapped at all in the direct map. Although we have map_domain_page, they > > Nit: "will not be mapped" or "are not going to be mapped", or else this > sounds like there's a bug somewhere. > >> --- a/xen/arch/x86/setup.c >> +++ b/xen/arch/x86/setup.c >> @@ -870,6 +870,7 @@ void __init noreturn __start_xen(unsigned long mbi_p) >> unsigned long eb_start, eb_end; >> bool acpi_boot_table_init_done = false, relocated = false; >> int ret; >> + bool vm_init_done = false; > > Can this please be grouped with the other bool-s (even visible in context)? This can't fit on the same line. So I went with: bool acpi_boot_table_init_done = false, relocated = false; bool vm_init_done = false; I prefer this over the below: bool acpi_boot_table_init_done = false, relocated false, vm_init_done = false; Cheers,
Hi Jan, On 21/12/2022 10:22, Jan Beulich wrote: > On 21.12.2022 11:18, Julien Grall wrote: >> On 20/12/2022 15:08, Jan Beulich wrote: >>> On 16.12.2022 12:48, Julien Grall wrote: >>>> --- a/xen/common/vmap.c >>>> +++ b/xen/common/vmap.c >>>> @@ -34,9 +34,20 @@ void __init vm_init_type(enum vmap_region type, void *start, void *end) >>>> >>>> for ( i = 0, va = (unsigned long)vm_bitmap(type); i < nr; ++i, va += PAGE_SIZE ) >>>> { >>>> - struct page_info *pg = alloc_domheap_page(NULL, 0); >>>> + mfn_t mfn; >>>> + int rc; >>>> >>>> - map_pages_to_xen(va, page_to_mfn(pg), 1, PAGE_HYPERVISOR); >>>> + if ( system_state == SYS_STATE_early_boot ) >>>> + mfn = alloc_boot_pages(1, 1); >>>> + else >>>> + { >>>> + struct page_info *pg = alloc_domheap_page(NULL, 0); >>>> + >>>> + BUG_ON(!pg); >>>> + mfn = page_to_mfn(pg); >>>> + } >>>> + rc = map_pages_to_xen(va, mfn, 1, PAGE_HYPERVISOR); >>>> + BUG_ON(rc); >>> >>> The adding of a return value check is unrelated and not overly useful: >>> >>>> clear_page((void *)va); >>> >>> This will fault anyway if the mapping attempt failed. >> >> Not always. At least on Arm, map_pages_to_xen() could fail if the VA was >> mapped to another physical address. > > Oh, okay. > >> This seems unlikely, yet I think that relying on clear_page() to always >> fail when map_pages_to_xen() return an error is bogus. > > Fair enough, but then please at least call out the change (and the reason) > in the description. Even better might be to make this a separate change, > but I wouldn't insist (quite likely I wouldn't have made this a separate > change either). I have moved the change in a separate patch. Cheers, > > Jan
On Fri, 16 Dec 2022, Julien Grall wrote: > From: Wei Liu <wei.liu2@citrix.com> > > After the direct map removal, pages from the boot allocator are not > mapped at all in the direct map. Although we have map_domain_page, they > are ephemeral and are less helpful for mappings that are more than a > page, so we want a mechanism to globally map a range of pages, which is > what vmap is for. Therefore, we bring vm_init into early boot stage. > > To allow vmap to be initialised and used in early boot, we need to > modify vmap to receive pages from the boot allocator during early boot > stage. > > Signed-off-by: Wei Liu <wei.liu2@citrix.com> > Signed-off-by: David Woodhouse <dwmw2@amazon.com> > Signed-off-by: Hongyan Xia <hongyxia@amazon.com> > Signed-off-by: Julien Grall <jgrall@amazon.com> For the arm and common parts: Reviewed-by: Stefano Stabellini <sstabellini@kernel.org> > --- > xen/arch/arm/setup.c | 4 ++-- > xen/arch/x86/setup.c | 31 ++++++++++++++++++++----------- > xen/common/vmap.c | 37 +++++++++++++++++++++++++++++-------- > 3 files changed, 51 insertions(+), 21 deletions(-) > > diff --git a/xen/arch/arm/setup.c b/xen/arch/arm/setup.c > index 1f26f67b90e3..2311726f5ddd 100644 > --- a/xen/arch/arm/setup.c > +++ b/xen/arch/arm/setup.c > @@ -1028,6 +1028,8 @@ void __init start_xen(unsigned long boot_phys_offset, > > setup_mm(); > > + vm_init(); > + > /* Parse the ACPI tables for possible boot-time configuration */ > acpi_boot_table_init(); > > @@ -1039,8 +1041,6 @@ void __init start_xen(unsigned long boot_phys_offset, > */ > system_state = SYS_STATE_boot; > > - vm_init(); > - > if ( acpi_disabled ) > { > printk("Booting using Device Tree\n"); > diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c > index 6bb5bc7c84be..1c2e09711eb0 100644 > --- a/xen/arch/x86/setup.c > +++ b/xen/arch/x86/setup.c > @@ -870,6 +870,7 @@ void __init noreturn __start_xen(unsigned long mbi_p) > unsigned long eb_start, eb_end; > bool acpi_boot_table_init_done = false, relocated = false; > int ret; > + bool vm_init_done = false; > struct ns16550_defaults ns16550 = { > .data_bits = 8, > .parity = 'n', > @@ -1442,12 +1443,23 @@ void __init noreturn __start_xen(unsigned long mbi_p) > continue; > > if ( !acpi_boot_table_init_done && > - s >= (1ULL << 32) && > - !acpi_boot_table_init() ) > + s >= (1ULL << 32) ) > { > - acpi_boot_table_init_done = true; > - srat_parse_regions(s); > - setup_max_pdx(raw_max_page); > + /* > + * We only initialise vmap and acpi after going through the bottom > + * 4GiB, so that we have enough pages in the boot allocator. > + */ > + if ( !vm_init_done ) > + { > + vm_init(); > + vm_init_done = true; > + } > + if ( !acpi_boot_table_init() ) > + { > + acpi_boot_table_init_done = true; > + srat_parse_regions(s); > + setup_max_pdx(raw_max_page); > + } > } > > if ( pfn_to_pdx((e - 1) >> PAGE_SHIFT) >= max_pdx ) > @@ -1624,6 +1636,9 @@ void __init noreturn __start_xen(unsigned long mbi_p) > > init_frametable(); > > + if ( !vm_init_done ) > + vm_init(); > + > if ( !acpi_boot_table_init_done ) > acpi_boot_table_init(); > > @@ -1661,12 +1676,6 @@ void __init noreturn __start_xen(unsigned long mbi_p) > end_boot_allocator(); > > system_state = SYS_STATE_boot; > - /* > - * No calls involving ACPI code should go between the setting of > - * SYS_STATE_boot and vm_init() (or else acpi_os_{,un}map_memory() > - * will break). > - */ > - vm_init(); > > bsp_stack = cpu_alloc_stack(0); > if ( !bsp_stack ) > diff --git a/xen/common/vmap.c b/xen/common/vmap.c > index 4fd6b3067ec1..1340c7c6faf6 100644 > --- a/xen/common/vmap.c > +++ b/xen/common/vmap.c > @@ -34,9 +34,20 @@ void __init vm_init_type(enum vmap_region type, void *start, void *end) > > for ( i = 0, va = (unsigned long)vm_bitmap(type); i < nr; ++i, va += PAGE_SIZE ) > { > - struct page_info *pg = alloc_domheap_page(NULL, 0); > + mfn_t mfn; > + int rc; > > - map_pages_to_xen(va, page_to_mfn(pg), 1, PAGE_HYPERVISOR); > + if ( system_state == SYS_STATE_early_boot ) > + mfn = alloc_boot_pages(1, 1); > + else > + { > + struct page_info *pg = alloc_domheap_page(NULL, 0); > + > + BUG_ON(!pg); > + mfn = page_to_mfn(pg); > + } > + rc = map_pages_to_xen(va, mfn, 1, PAGE_HYPERVISOR); > + BUG_ON(rc); > clear_page((void *)va); > } > bitmap_fill(vm_bitmap(type), vm_low[type]); > @@ -62,7 +73,7 @@ static void *vm_alloc(unsigned int nr, unsigned int align, > spin_lock(&vm_lock); > for ( ; ; ) > { > - struct page_info *pg; > + mfn_t mfn; > > ASSERT(vm_low[t] == vm_top[t] || !test_bit(vm_low[t], vm_bitmap(t))); > for ( start = vm_low[t]; start < vm_top[t]; ) > @@ -97,9 +108,16 @@ static void *vm_alloc(unsigned int nr, unsigned int align, > if ( vm_top[t] >= vm_end[t] ) > return NULL; > > - pg = alloc_domheap_page(NULL, 0); > - if ( !pg ) > - return NULL; > + if ( system_state == SYS_STATE_early_boot ) > + mfn = alloc_boot_pages(1, 1); > + else > + { > + struct page_info *pg = alloc_domheap_page(NULL, 0); > + > + if ( !pg ) > + return NULL; > + mfn = page_to_mfn(pg); > + } > > spin_lock(&vm_lock); > > @@ -107,7 +125,7 @@ static void *vm_alloc(unsigned int nr, unsigned int align, > { > unsigned long va = (unsigned long)vm_bitmap(t) + vm_top[t] / 8; > > - if ( !map_pages_to_xen(va, page_to_mfn(pg), 1, PAGE_HYPERVISOR) ) > + if ( !map_pages_to_xen(va, mfn, 1, PAGE_HYPERVISOR) ) > { > clear_page((void *)va); > vm_top[t] += PAGE_SIZE * 8; > @@ -117,7 +135,10 @@ static void *vm_alloc(unsigned int nr, unsigned int align, > } > } > > - free_domheap_page(pg); > + if ( system_state == SYS_STATE_early_boot ) > + init_boot_pages(mfn_to_maddr(mfn), mfn_to_maddr(mfn) + PAGE_SIZE); > + else > + free_domheap_page(mfn_to_page(mfn)); > > if ( start >= vm_top[t] ) > { > -- > 2.38.1 >
diff --git a/xen/arch/arm/setup.c b/xen/arch/arm/setup.c index 1f26f67b90e3..2311726f5ddd 100644 --- a/xen/arch/arm/setup.c +++ b/xen/arch/arm/setup.c @@ -1028,6 +1028,8 @@ void __init start_xen(unsigned long boot_phys_offset, setup_mm(); + vm_init(); + /* Parse the ACPI tables for possible boot-time configuration */ acpi_boot_table_init(); @@ -1039,8 +1041,6 @@ void __init start_xen(unsigned long boot_phys_offset, */ system_state = SYS_STATE_boot; - vm_init(); - if ( acpi_disabled ) { printk("Booting using Device Tree\n"); diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c index 6bb5bc7c84be..1c2e09711eb0 100644 --- a/xen/arch/x86/setup.c +++ b/xen/arch/x86/setup.c @@ -870,6 +870,7 @@ void __init noreturn __start_xen(unsigned long mbi_p) unsigned long eb_start, eb_end; bool acpi_boot_table_init_done = false, relocated = false; int ret; + bool vm_init_done = false; struct ns16550_defaults ns16550 = { .data_bits = 8, .parity = 'n', @@ -1442,12 +1443,23 @@ void __init noreturn __start_xen(unsigned long mbi_p) continue; if ( !acpi_boot_table_init_done && - s >= (1ULL << 32) && - !acpi_boot_table_init() ) + s >= (1ULL << 32) ) { - acpi_boot_table_init_done = true; - srat_parse_regions(s); - setup_max_pdx(raw_max_page); + /* + * We only initialise vmap and acpi after going through the bottom + * 4GiB, so that we have enough pages in the boot allocator. + */ + if ( !vm_init_done ) + { + vm_init(); + vm_init_done = true; + } + if ( !acpi_boot_table_init() ) + { + acpi_boot_table_init_done = true; + srat_parse_regions(s); + setup_max_pdx(raw_max_page); + } } if ( pfn_to_pdx((e - 1) >> PAGE_SHIFT) >= max_pdx ) @@ -1624,6 +1636,9 @@ void __init noreturn __start_xen(unsigned long mbi_p) init_frametable(); + if ( !vm_init_done ) + vm_init(); + if ( !acpi_boot_table_init_done ) acpi_boot_table_init(); @@ -1661,12 +1676,6 @@ void __init noreturn __start_xen(unsigned long mbi_p) end_boot_allocator(); system_state = SYS_STATE_boot; - /* - * No calls involving ACPI code should go between the setting of - * SYS_STATE_boot and vm_init() (or else acpi_os_{,un}map_memory() - * will break). - */ - vm_init(); bsp_stack = cpu_alloc_stack(0); if ( !bsp_stack ) diff --git a/xen/common/vmap.c b/xen/common/vmap.c index 4fd6b3067ec1..1340c7c6faf6 100644 --- a/xen/common/vmap.c +++ b/xen/common/vmap.c @@ -34,9 +34,20 @@ void __init vm_init_type(enum vmap_region type, void *start, void *end) for ( i = 0, va = (unsigned long)vm_bitmap(type); i < nr; ++i, va += PAGE_SIZE ) { - struct page_info *pg = alloc_domheap_page(NULL, 0); + mfn_t mfn; + int rc; - map_pages_to_xen(va, page_to_mfn(pg), 1, PAGE_HYPERVISOR); + if ( system_state == SYS_STATE_early_boot ) + mfn = alloc_boot_pages(1, 1); + else + { + struct page_info *pg = alloc_domheap_page(NULL, 0); + + BUG_ON(!pg); + mfn = page_to_mfn(pg); + } + rc = map_pages_to_xen(va, mfn, 1, PAGE_HYPERVISOR); + BUG_ON(rc); clear_page((void *)va); } bitmap_fill(vm_bitmap(type), vm_low[type]); @@ -62,7 +73,7 @@ static void *vm_alloc(unsigned int nr, unsigned int align, spin_lock(&vm_lock); for ( ; ; ) { - struct page_info *pg; + mfn_t mfn; ASSERT(vm_low[t] == vm_top[t] || !test_bit(vm_low[t], vm_bitmap(t))); for ( start = vm_low[t]; start < vm_top[t]; ) @@ -97,9 +108,16 @@ static void *vm_alloc(unsigned int nr, unsigned int align, if ( vm_top[t] >= vm_end[t] ) return NULL; - pg = alloc_domheap_page(NULL, 0); - if ( !pg ) - return NULL; + if ( system_state == SYS_STATE_early_boot ) + mfn = alloc_boot_pages(1, 1); + else + { + struct page_info *pg = alloc_domheap_page(NULL, 0); + + if ( !pg ) + return NULL; + mfn = page_to_mfn(pg); + } spin_lock(&vm_lock); @@ -107,7 +125,7 @@ static void *vm_alloc(unsigned int nr, unsigned int align, { unsigned long va = (unsigned long)vm_bitmap(t) + vm_top[t] / 8; - if ( !map_pages_to_xen(va, page_to_mfn(pg), 1, PAGE_HYPERVISOR) ) + if ( !map_pages_to_xen(va, mfn, 1, PAGE_HYPERVISOR) ) { clear_page((void *)va); vm_top[t] += PAGE_SIZE * 8; @@ -117,7 +135,10 @@ static void *vm_alloc(unsigned int nr, unsigned int align, } } - free_domheap_page(pg); + if ( system_state == SYS_STATE_early_boot ) + init_boot_pages(mfn_to_maddr(mfn), mfn_to_maddr(mfn) + PAGE_SIZE); + else + free_domheap_page(mfn_to_page(mfn)); if ( start >= vm_top[t] ) {