Message ID | aa67a0dacab0e3f39dabeb30e31732d617cadde1.1702607884.git.sanastasio@raptorengineering.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Early Boot Allocation on Power | expand |
On 15.12.2023 03:44, Shawn Anastasio wrote: > --- /dev/null > +++ b/xen/arch/ppc/include/asm/setup.h > @@ -0,0 +1,123 @@ > +#ifndef __ASM_PPC_SETUP_H__ > +#define __ASM_PPC_SETUP_H__ > + > +#define max_init_domid (0) > + > +#include <public/version.h> > +#include <asm/p2m.h> > +#include <xen/device_tree.h> > + > +#define MIN_FDT_ALIGN 8 > +#define MAX_FDT_SIZE SZ_2M > + > +#define NR_MEM_BANKS 256 > + > +#define MAX_MODULES 32 /* Current maximum useful modules */ > + > +typedef enum { > + BOOTMOD_XEN, > + BOOTMOD_FDT, > + BOOTMOD_KERNEL, > + BOOTMOD_RAMDISK, > + BOOTMOD_XSM, > + BOOTMOD_GUEST_DTB, > + BOOTMOD_UNKNOWN > +} bootmodule_kind; > + > +enum membank_type { > + /* > + * The MEMBANK_DEFAULT type refers to either reserved memory for the > + * device/firmware (when the bank is in 'reserved_mem') or any RAM (when > + * the bank is in 'mem'). > + */ > + MEMBANK_DEFAULT, > + /* > + * The MEMBANK_STATIC_DOMAIN type is used to indicate whether the memory > + * bank is bound to a static Xen domain. It is only valid when the bank > + * is in reserved_mem. > + */ > + MEMBANK_STATIC_DOMAIN, > + /* > + * The MEMBANK_STATIC_HEAP type is used to indicate whether the memory > + * bank is reserved as static heap. It is only valid when the bank is > + * in reserved_mem. > + */ > + MEMBANK_STATIC_HEAP, > +}; > + > +/* Indicates the maximum number of characters(\0 included) for shm_id */ > +#define MAX_SHM_ID_LENGTH 16 > + > +struct membank { > + paddr_t start; > + paddr_t size; > + enum membank_type type; > +}; > + > +struct meminfo { > + unsigned int nr_banks; > + struct membank bank[NR_MEM_BANKS]; > +}; > + > +/* > + * The domU flag is set for kernels and ramdisks of "xen,domain" nodes. > + * The purpose of the domU flag is to avoid getting confused in > + * kernel_probe, where we try to guess which is the dom0 kernel and > + * initrd to be compatible with all versions of the multiboot spec. > + */ > +#define BOOTMOD_MAX_CMDLINE 1024 Why does this live here, rather than ... > +struct bootmodule { > + bootmodule_kind kind; > + bool domU; > + paddr_t start; > + paddr_t size; > +}; > + > +/* DT_MAX_NAME is the node name max length according the DT spec */ > +#define DT_MAX_NAME 41 ... next to this? > +struct bootcmdline { > + bootmodule_kind kind; > + bool domU; > + paddr_t start; > + char dt_name[DT_MAX_NAME]; > + char cmdline[BOOTMOD_MAX_CMDLINE]; > +}; > + > +struct bootmodules { > + int nr_mods; > + struct bootmodule module[MAX_MODULES]; > +}; > + > +struct bootcmdlines { > + unsigned int nr_mods; > + struct bootcmdline cmdline[MAX_MODULES]; > +}; > + > +struct bootinfo { > + struct meminfo mem; > + struct meminfo reserved_mem; > + struct bootmodules modules; > + struct bootcmdlines cmdlines; > + bool static_heap; Unused field? > +}; > + > +extern struct bootinfo bootinfo; > + > +/* > + * setup.c > + */ > + > +bool check_reserved_regions_overlap(paddr_t region_start, paddr_t region_size); > +struct bootmodule *add_boot_module(bootmodule_kind kind, > + paddr_t start, paddr_t size, bool domU); > +void add_boot_cmdline(const char *name, const char *cmdline, > + bootmodule_kind kind, paddr_t start, bool domU); > +const char *boot_module_kind_as_string(bootmodule_kind kind); > +struct bootcmdline * __init boot_cmdline_find_by_kind(bootmodule_kind kind); No __init please on declarations; section placement attributes make sense only on definitions (with pretty narrow exceptions). > --- a/xen/arch/ppc/setup.c > +++ b/xen/arch/ppc/setup.c > @@ -1,16 +1,296 @@ > /* SPDX-License-Identifier: GPL-2.0-or-later */ > #include <xen/init.h> > #include <xen/lib.h> > +#include <xen/libfdt/libfdt.h> > #include <xen/mm.h> > #include <public/version.h> > #include <asm/boot.h> > #include <asm/early_printk.h> > #include <asm/mm.h> > #include <asm/processor.h> > +#include <asm/setup.h> > > /* Xen stack for bringing up the first CPU. */ > unsigned char __initdata cpu0_boot_stack[STACK_SIZE] __aligned(STACK_SIZE); > > +struct bootinfo __initdata bootinfo; > + > +void __init add_boot_cmdline(const char *name, const char *cmdline, > + bootmodule_kind kind, paddr_t start, bool domU) > +{ > + struct bootcmdlines *cmds = &bootinfo.cmdlines; > + struct bootcmdline *cmd; > + > + if ( cmds->nr_mods == MAX_MODULES ) > + { > + printk("Ignoring %s cmdline (too many)\n", name); > + return; > + } > + > + cmd = &cmds->cmdline[cmds->nr_mods++]; > + cmd->kind = kind; > + cmd->domU = domU; > + cmd->start = start; > + > + ASSERT(strlen(name) <= DT_MAX_NAME); > + safe_strcpy(cmd->dt_name, name); > + > + if ( strlen(cmdline) > BOOTMOD_MAX_CMDLINE ) > + panic("module %s command line too long\n", name); > + safe_strcpy(cmd->cmdline, cmdline); > +} > + > +struct bootmodule __init *add_boot_module(bootmodule_kind kind, > + paddr_t start, paddr_t size, > + bool domU) > +{ > + struct bootmodules *mods = &bootinfo.modules; > + struct bootmodule *mod; > + unsigned int i; > + > + if ( mods->nr_mods == MAX_MODULES ) > + { > + printk("Ignoring %s boot module at %"PRIpaddr"-%"PRIpaddr" (too many)\n", > + boot_module_kind_as_string(kind), start, start + size); > + return NULL; > + } > + > + if ( check_reserved_regions_overlap(start, size) ) > + return NULL; > + > + for ( i = 0 ; i < mods->nr_mods ; i++ ) > + { > + mod = &mods->module[i]; > + if ( mod->kind == kind && mod->start == start ) > + { > + if ( !domU ) > + mod->domU = false; What's the intention behind this (negative) accumulation of "domU"? > + return mod; > + } > + } And what's the intention behind checking existing modules here (but not existing command lines higher up in add_boot_cmdline())? > + mod = &mods->module[mods->nr_mods++]; > + mod->kind = kind; > + mod->start = start; > + mod->size = size; > + mod->domU = domU; > + > + return mod; > +} > + > +const char * __init boot_module_kind_as_string(bootmodule_kind kind) > +{ > + switch ( kind ) > + { > + case BOOTMOD_XEN: return "Xen"; > + case BOOTMOD_FDT: return "Device Tree"; > + case BOOTMOD_KERNEL: return "Kernel"; > + default: BUG(); > + } > +} > + > +/* > + * TODO: '*_end' could be 0 if the module/region is at the end of the physical > + * address space. This is for now not handled as it requires more rework. > + */ > +static bool __init bootmodules_overlap_check(struct bootmodules *bootmodules, > + paddr_t region_start, > + paddr_t region_size) > +{ > + paddr_t mod_start = INVALID_PADDR, mod_end = 0; Pointless initializers? (The variables might benefit from moving into the more narrow scope anyway.) > + paddr_t region_end = region_start + region_size; > + unsigned int i, mod_num = bootmodules->nr_mods; > + > + for ( i = 0; i < mod_num; i++ ) > + { > + mod_start = bootmodules->module[i].start; > + mod_end = mod_start + bootmodules->module[i].size; > + > + if ( region_end <= mod_start || region_start >= mod_end ) > + continue; > + else I for one consider such an "else" misleading. > + { > + printk("Region: [%#"PRIpaddr", %#"PRIpaddr") overlapping with" > + " mod[%u]: [%#"PRIpaddr", %#"PRIpaddr")\n", region_start, > + region_end, i, mod_start, mod_end); > + return true; > + } > + } > + > + return false; > +} > + > +/* > + * TODO: '*_end' could be 0 if the bank/region is at the end of the physical > + * address space. This is for now not handled as it requires more rework. > + */ > +static bool __init meminfo_overlap_check(struct meminfo *meminfo, > + paddr_t region_start, > + paddr_t region_size) > +{ > + paddr_t bank_start = INVALID_PADDR, bank_end = 0; As above: Pointless initializers? (The variables might benefit from moving into the more narrow scope anyway.) > + paddr_t region_end = region_start + region_size; > + unsigned int i, bank_num = meminfo->nr_banks; > + > + for ( i = 0; i < bank_num; i++ ) > + { > + bank_start = meminfo->bank[i].start; > + bank_end = bank_start + meminfo->bank[i].size; > + > + if ( region_end <= bank_start || region_start >= bank_end ) > + continue; > + else > + { > + printk("Region: [%#"PRIpaddr", %#"PRIpaddr") overlapping with" > + " bank[%u]: [%#"PRIpaddr", %#"PRIpaddr")\n", region_start, > + region_end, i, bank_start, bank_end); I think this is confusing to read: When the format string already needs wrapping, I don't think the next argument should be put on the same line. I was almost going to complain that arguments don't fit the format string. > + return true; > + } > + } > + > + return false; > +} > + > +/* > + * Given an input physical address range, check if this range is overlapping > + * with the existing reserved memory regions defined in bootinfo. > + * Return true if the input physical address range is overlapping with any > + * existing reserved memory regions, otherwise false. > + */ > +bool __init check_reserved_regions_overlap(paddr_t region_start, > + paddr_t region_size) > +{ > + /* Check if input region is overlapping with bootinfo.reserved_mem banks */ > + if ( meminfo_overlap_check(&bootinfo.reserved_mem, > + region_start, region_size) ) > + return true; > + > + /* Check if input region is overlapping with bootmodules */ > + if ( bootmodules_overlap_check(&bootinfo.modules, > + region_start, region_size) ) > + return true; > + > + return false; > +} > + > +/* > + * Return the end of the non-module region starting at s. In other > + * words return s the start of the next modules after s. Stray " s" after "return"? > + * On input *end is the end of the region which should be considered > + * and it is updated to reflect the end of the module, clipped to the > + * end of the region if it would run over. > + */ > +static paddr_t __init next_module(paddr_t s, paddr_t *end) > +{ > + struct bootmodules *mi = &bootinfo.modules; const? > + paddr_t lowest = ~(paddr_t)0; > + int i; unsigned int? > + for ( i = 0; i < mi->nr_mods; i++ ) > + { > + paddr_t mod_s = mi->module[i].start; > + paddr_t mod_e = mod_s + mi->module[i].size; > + > + if ( !mi->module[i].size ) > + continue; > + > + if ( mod_s < s ) > + continue; > + if ( mod_s > lowest ) > + continue; > + if ( mod_s > *end ) > + continue; > + lowest = mod_s; > + *end = min(*end, mod_e); > + } > + return lowest; > +} > + > +static void __init dt_unreserved_regions(paddr_t s, paddr_t e, > + void (*cb)(paddr_t ps, paddr_t pe), > + unsigned int first) > +{ > + unsigned int i; > + > + for ( i = 0 ; i < bootinfo.reserved_mem.nr_banks; i++ ) > + { > + paddr_t r_s = bootinfo.reserved_mem.bank[i].start; > + paddr_t r_e = r_s + bootinfo.reserved_mem.bank[i].size; > + > + if ( s < r_e && r_s < e ) > + { > + dt_unreserved_regions(r_e, e, cb, i + 1); > + dt_unreserved_regions(s, r_s, cb, i + 1); What's the reason for this recursion? Can there be overlapping regions? If so, is there a guaranteed depth limit (seeing in particular that you're not using the last function parameter)? > + return; > + } > + } > + > + cb(s, e); > +} > + > +/* > + * boot_cmdline_find_by_kind can only be used to return Xen modules (e.g > + * XSM, DTB) or Dom0 modules. This is not suitable for looking up guest > + * modules. > + */ > +struct bootcmdline * __init boot_cmdline_find_by_kind(bootmodule_kind kind) This function looks to be unused? > +{ > + struct bootcmdlines *cmds = &bootinfo.cmdlines; > + struct bootcmdline *cmd; > + int i; unsigned int? > + for ( i = 0 ; i < cmds->nr_mods ; i++ ) > + { > + cmd = &cmds->cmdline[i]; > + if ( cmd->kind == kind && !cmd->domU ) > + return cmd; > + } > + return NULL; > +} > + > +/* > + * Populate the boot allocator. Based on arch/arm/setup.c's > + * populate_boot_allocator. > + * All RAM but the following regions will be added to the boot allocator: > + * - Modules (e.g., Xen, Kernel) > + * - Reserved regions > + */ > +static void __init populate_boot_allocator(void) > +{ > + unsigned int i; > + const struct meminfo *banks = &bootinfo.mem; > + paddr_t s, e; > + > + for ( i = 0; i < banks->nr_banks; i++ ) > + { > + const struct membank *bank = &banks->bank[i]; > + paddr_t bank_end = bank->start + bank->size; Since you already make use of inner scope variables, and reason not to put s here and ... > + s = bank->start; > + while ( s < bank_end ) > + { > + paddr_t n = bank_end; ... e even here (each then gaining an initializer as well)? > + e = next_module(s, &n); > + > + if ( e == ~(paddr_t)0 ) > + e = n = bank_end; > + > + /* > + * Module in a RAM bank other than the one which we are > + * not dealing with here. Nit: "not"? > + */ > + if ( e > bank_end ) > + e = bank_end; > + > + dt_unreserved_regions(s, e, init_boot_pages, 0); > + > + s = n; > + } > + } > +} > + > void setup_exceptions(void) > { > unsigned long lpcr; > @@ -24,6 +304,8 @@ void __init noreturn start_xen(unsigned long r3, unsigned long r4, > unsigned long r5, unsigned long r6, > unsigned long r7) > { > + void *boot_fdt; const? > @@ -32,11 +314,16 @@ void __init noreturn start_xen(unsigned long r3, unsigned long r4, > else > { > /* kexec boot protocol */ > - boot_opal_init((void *)r3); > + boot_fdt = (void *)r3; And then here as well, to avoid Misra complaints. > + boot_opal_init(boot_fdt); > } > > setup_exceptions(); > > + boot_fdt_info(boot_fdt, r3); > + > + populate_boot_allocator(); > + > setup_initial_pagetables(); > > early_printk("Hello, ppc64le!\n"); > --- a/xen/common/device-tree/bootfdt.c > +++ b/xen/common/device-tree/bootfdt.c > @@ -543,12 +543,33 @@ size_t __init boot_fdt_info(const void *fdt, paddr_t paddr) > if ( ret < 0 ) > panic("No valid device tree\n"); > > - add_boot_module(BOOTMOD_FDT, paddr, fdt_totalsize(fdt), false); > - > ret = device_tree_for_each_node((void *)fdt, 0, early_scan_node, NULL); > if ( ret ) > panic("Early FDT parsing failed (%d)\n", ret); > > + /* > + * Add module for the FDT itself after the device tree has been parsed. This > + * is required on ppc64le where the device tree passed to Xen may have been > + * allocated by skiboot, in which case it will exist within a reserved > + * region and this call will fail. This is fine, however, since either way > + * the allocator will know not to step on the device tree. > + */ > + add_boot_module(BOOTMOD_FDT, paddr, fdt_totalsize(fdt), false); > + > + /* > + * Xen relocates itself at the ppc64 entrypoint, so we need to manually mark > + * the kernel module. > + */ > + if ( IS_ENABLED(CONFIG_PPC64) ) { Nit (style): Brace placement. > + paddr_t xen_start, xen_end; > + > + xen_start = __pa(_start); > + xen_end = PAGE_ALIGN(__pa(_end)); > + if ( !add_boot_module(BOOTMOD_XEN, xen_start, xen_end, false) ) > + panic("Xen overlaps reserved memory! %016lx - %016lx\n", xen_start, > + xen_end); > + } I'll need to leave commenting on this to the DT maintainers. Jan
Hi, On 15/12/2023 02:44, Shawn Anastasio wrote: > Move PPC off the asm-generic setup.h and enable usage of bootfdt for > populating the boot info struct from the firmware-provided device tree. > Also enable the Xen boot page allocator. > > Includes minor changes to bootfdt.c's boot_fdt_info() to tolerate the > scenario in which the FDT overlaps a reserved memory region, as is the > case on PPC when booted directly from skiboot. Also includes a minor > change to record Xen's correct position on PPC where Xen relocates > itself to at the entrypoint. > > Signed-off-by: Shawn Anastasio <sanastasio@raptorengineering.com> > --- > xen/arch/ppc/include/asm/Makefile | 1 - > xen/arch/ppc/include/asm/setup.h | 123 +++++++++++++ > xen/arch/ppc/setup.c | 289 +++++++++++++++++++++++++++++- I might be missing something. But isn't most of the code you add is the same as Arm? And if so, shouldn't this be consolidated? I would also expect RISC-V to need the same. Cheers,
Hi, On 15/12/2023 02:44, Shawn Anastasio wrote: > diff --git a/xen/common/device-tree/bootfdt.c b/xen/common/device-tree/bootfdt.c > index 796ac01c18..7ddfcc7e2a 100644 > --- a/xen/common/device-tree/bootfdt.c > +++ b/xen/common/device-tree/bootfdt.c > @@ -543,12 +543,33 @@ size_t __init boot_fdt_info(const void *fdt, paddr_t paddr) > if ( ret < 0 ) > panic("No valid device tree\n"); > > - add_boot_module(BOOTMOD_FDT, paddr, fdt_totalsize(fdt), false); > - > ret = device_tree_for_each_node((void *)fdt, 0, early_scan_node, NULL); > if ( ret ) > panic("Early FDT parsing failed (%d)\n", ret); > > + /* > + * Add module for the FDT itself after the device tree has been parsed. This > + * is required on ppc64le where the device tree passed to Xen may have been > + * allocated by skiboot, in which case it will exist within a reserved > + * region and this call will fail. This is fine, however, since either way > + * the allocator will know not to step on the device tree. > + */ > + add_boot_module(BOOTMOD_FDT, paddr, fdt_totalsize(fdt), false); The consequence is BOOTMOD_FDT will not be added. AFAICT, Arm doesn't try to access BOOTMOD_FDT, but I think it would be worth to print message. > + > + /* > + * Xen relocates itself at the ppc64 entrypoint, so we need to manually mark > + * the kernel module. > + */ > + if ( IS_ENABLED(CONFIG_PPC64) ) { > + paddr_t xen_start, xen_end; > + > + xen_start = __pa(_start); > + xen_end = PAGE_ALIGN(__pa(_end)); > + if ( !add_boot_module(BOOTMOD_XEN, xen_start, xen_end, false) ) > + panic("Xen overlaps reserved memory! %016lx - %016lx\n", xen_start, > + xen_end); > + } Can you explain why this is added here rather than in the caller of boot_fdt_info()? Either before or after? If after, I guess it is because of early_print_info(). In which case, I would suggest to move off early_print_info() to caller on each architecture. Cheers,
Hi Shawn, On 20/12/2023 13:23, Julien Grall wrote: > Hi, > > On 15/12/2023 02:44, Shawn Anastasio wrote: >> Move PPC off the asm-generic setup.h and enable usage of bootfdt for >> populating the boot info struct from the firmware-provided device tree. >> Also enable the Xen boot page allocator. >> >> Includes minor changes to bootfdt.c's boot_fdt_info() to tolerate the >> scenario in which the FDT overlaps a reserved memory region, as is the >> case on PPC when booted directly from skiboot. Also includes a minor >> change to record Xen's correct position on PPC where Xen relocates >> itself to at the entrypoint. >> >> Signed-off-by: Shawn Anastasio <sanastasio@raptorengineering.com> >> --- >> xen/arch/ppc/include/asm/Makefile | 1 - >> xen/arch/ppc/include/asm/setup.h | 123 +++++++++++++ >> xen/arch/ppc/setup.c | 289 +++++++++++++++++++++++++++++- > > I might be missing something. But isn't most of the code you add is the > same as Arm? And if so, shouldn't this be consolidated? So I have create a new file and move the PPC functions I think should be the same. Then I replace with the Arm code. Below the diff. Looking through it, I can't see why the code cannot be shared in except part of populate_boot_allocator() (which could be split). I forsee that some of the code you remove will be necessary (e.g. BOOTMOD_RAMDISK, BOOTMOD_XSM). And some of the style change doesn't match what we do in Xen (messages are not split). If there are common bits with other architectures, then I would really like if they are shared rather than duplicated. I am more than happy to help finding a good split because it will reduce maintenance effort for everyone in the longer run. Cheers, diff --git a/xen/common/device-tree/setup.c b/xen/common/device-tree/setup.c index 3f79b97e9036..9c376527d11b 100644 --- a/xen/common/device-tree/setup.c +++ b/xen/common/device-tree/setup.c @@ -70,6 +70,10 @@ const char * __init boot_module_kind_as_string(bootmodule_kind kind) case BOOTMOD_XEN: return "Xen"; case BOOTMOD_FDT: return "Device Tree"; case BOOTMOD_KERNEL: return "Kernel"; + case BOOTMOD_RAMDISK: return "Ramdisk"; + case BOOTMOD_XSM: return "XSM"; + case BOOTMOD_GUEST_DTB: return "DTB"; + case BOOTMOD_UNKNOWN: return "Unknown"; default: BUG(); } } @@ -95,9 +99,8 @@ static bool __init bootmodules_overlap_check(struct bootmodules *bootmodules, continue; else { - printk("Region: [%#"PRIpaddr", %#"PRIpaddr") overlapping with" - " mod[%u]: [%#"PRIpaddr", %#"PRIpaddr")\n", region_start, - region_end, i, mod_start, mod_end); + printk("Region: [%#"PRIpaddr", %#"PRIpaddr") overlapping with mod[%u]: [%#"PRIpaddr", %#"PRIpaddr")\n", + region_start, region_end, i, mod_start, mod_end); return true; } } @@ -126,9 +129,8 @@ static bool __init meminfo_overlap_check(struct meminfo *meminfo, continue; else { - printk("Region: [%#"PRIpaddr", %#"PRIpaddr") overlapping with" - " bank[%u]: [%#"PRIpaddr", %#"PRIpaddr")\n", region_start, - region_end, i, bank_start, bank_end); + printk("Region: [%#"PRIpaddr", %#"PRIpaddr") overlapping with bank[%u]: [%#"PRIpaddr", %#"PRIpaddr")\n", + region_start, region_end, i, bank_start, bank_end); return true; } } @@ -155,6 +157,12 @@ bool __init check_reserved_regions_overlap(paddr_t region_start, region_start, region_size) ) return true; +#ifdef CONFIG_ACPI + /* Check if input region is overlapping with ACPI EfiACPIReclaimMemory */ + if ( meminfo_overlap_check(&bootinfo.acpi, region_start, region_size) ) + return true; +#endif + return false; } @@ -196,12 +204,47 @@ static void __init dt_unreserved_regions(paddr_t s, paddr_t e, void (*cb)(paddr_t ps, paddr_t pe), unsigned int first) { - unsigned int i; + unsigned int i, nr; + int rc; + + rc = fdt_num_mem_rsv(device_tree_flattened); + if ( rc < 0 ) + panic("Unable to retrieve the number of reserved regions (rc=%d)\n", + rc); - for ( i = 0 ; i < bootinfo.reserved_mem.nr_banks; i++ ) + nr = rc; + + for ( i = first; i < nr ; i++ ) { - paddr_t r_s = bootinfo.reserved_mem.bank[i].start; - paddr_t r_e = r_s + bootinfo.reserved_mem.bank[i].size; + paddr_t r_s, r_e; + + if ( fdt_get_mem_rsv_paddr(device_tree_flattened, i, &r_s, &r_e ) < 0 ) + /* If we can't read it, pretend it doesn't exist... */ + continue; + + r_e += r_s; /* fdt_get_mem_rsv_paddr returns length */ + + if ( s < r_e && r_s < e ) + { + dt_unreserved_regions(r_e, e, cb, i+1); + dt_unreserved_regions(s, r_s, cb, i+1); + return; + } + } + + /* + * i is the current bootmodule we are evaluating across all possible + * kinds. + * + * When retrieving the corresponding reserved-memory addresses + * below, we need to index the bootinfo.reserved_mem bank starting + * from 0, and only counting the reserved-memory modules. Hence, + * we need to use i - nr. + */ + for ( ; i - nr < bootinfo.reserved_mem.nr_banks; i++ ) + { + paddr_t r_s = bootinfo.reserved_mem.bank[i - nr].start; + paddr_t r_e = r_s + bootinfo.reserved_mem.bank[i - nr].size; if ( s < r_e && r_s < e ) { @@ -214,6 +257,16 @@ static void __init dt_unreserved_regions(paddr_t s, paddr_t e, cb(s, e); } +void __init fw_unreserved_regions(paddr_t s, paddr_t e, + void (*cb)(paddr_t ps, paddr_t pe), + unsigned int first) +{ + if ( acpi_disabled ) + dt_unreserved_regions(s, e, cb, first); + else + cb(s, e); +} + /* * boot_cmdline_find_by_kind can only be used to return Xen modules (e.g * XSM, DTB) or Dom0 modules. This is not suitable for looking up guest @@ -235,18 +288,47 @@ struct bootcmdline * __init boot_cmdline_find_by_kind(bootmodule_kind kind) } /* - * Populate the boot allocator. Based on arch/arm/setup.c's - * populate_boot_allocator. - * All RAM but the following regions will be added to the boot allocator: + * Populate the boot allocator. + * If a static heap was not provided by the admin, all the RAM but the + * following regions will be added: * - Modules (e.g., Xen, Kernel) * - Reserved regions + * - Xenheap (arm32 only) + * If a static heap was provided by the admin, populate the boot + * allocator with the corresponding regions only, but with Xenheap excluded + * on arm32. */ -static void __init populate_boot_allocator(void) +void __init populate_boot_allocator(void) { unsigned int i; const struct meminfo *banks = &bootinfo.mem; paddr_t s, e; + if ( bootinfo.static_heap ) + { + for ( i = 0 ; i < bootinfo.reserved_mem.nr_banks; i++ ) + { + if ( bootinfo.reserved_mem.bank[i].type != MEMBANK_STATIC_HEAP ) + continue; + + s = bootinfo.reserved_mem.bank[i].start; + e = s + bootinfo.reserved_mem.bank[i].size; +#ifdef CONFIG_ARM_32 + /* Avoid the xenheap, note that the xenheap cannot across a bank */ + if ( s <= mfn_to_maddr(directmap_mfn_start) && + e >= mfn_to_maddr(directmap_mfn_end) ) + { + init_boot_pages(s, mfn_to_maddr(directmap_mfn_start)); + init_boot_pages(mfn_to_maddr(directmap_mfn_end), e); + } + else +#endif + init_boot_pages(s, e); + } + + return; + } + for ( i = 0; i < banks->nr_banks; i++ ) { const struct membank *bank = &banks->bank[i]; @@ -269,11 +351,18 @@ static void __init populate_boot_allocator(void) if ( e > bank_end ) e = bank_end; - dt_unreserved_regions(s, e, init_boot_pages, 0); - +#ifdef CONFIG_ARM_32 + /* Avoid the xenheap */ + if ( s < mfn_to_maddr(directmap_mfn_end) && + mfn_to_maddr(directmap_mfn_start) < e ) + { + e = mfn_to_maddr(directmap_mfn_start); + n = mfn_to_maddr(directmap_mfn_end); + } +#endif + + fw_unreserved_regions(s, e, init_boot_pages, 0); s = n; } } } - - > > I would also expect RISC-V to need the same. > > Cheers, >
Hi Julien, On 12/20/23 7:49 AM, Julien Grall wrote: > Hi, > > On 15/12/2023 02:44, Shawn Anastasio wrote: >> diff --git a/xen/common/device-tree/bootfdt.c >> b/xen/common/device-tree/bootfdt.c >> index 796ac01c18..7ddfcc7e2a 100644 >> --- a/xen/common/device-tree/bootfdt.c >> +++ b/xen/common/device-tree/bootfdt.c >> @@ -543,12 +543,33 @@ size_t __init boot_fdt_info(const void *fdt, >> paddr_t paddr) >> if ( ret < 0 ) >> panic("No valid device tree\n"); >> - add_boot_module(BOOTMOD_FDT, paddr, fdt_totalsize(fdt), false); >> - >> ret = device_tree_for_each_node((void *)fdt, 0, early_scan_node, >> NULL); >> if ( ret ) >> panic("Early FDT parsing failed (%d)\n", ret); >> + /* >> + * Add module for the FDT itself after the device tree has been >> parsed. This >> + * is required on ppc64le where the device tree passed to Xen may >> have been >> + * allocated by skiboot, in which case it will exist within a >> reserved >> + * region and this call will fail. This is fine, however, since >> either way >> + * the allocator will know not to step on the device tree. >> + */ >> + add_boot_module(BOOTMOD_FDT, paddr, fdt_totalsize(fdt), false); > > The consequence is BOOTMOD_FDT will not be added. AFAICT, Arm doesn't > try to access BOOTMOD_FDT, but I think it would be worth to print message. > A message will already be printed by `meminfo_overlap_check` in setup.c when this condition is hit, like this: (XEN) Region: [0x0000003076e9b0, 0x00000030775215) overlapping with bank[3]: [0x00000030600000, 0x00000031000000) If you'd like me to add another more descriptive message here to let the user know that BOOTMOD_FDT wasn't added I could do that, though. >> + >> + /* >> + * Xen relocates itself at the ppc64 entrypoint, so we need to >> manually mark >> + * the kernel module. >> + */ >> + if ( IS_ENABLED(CONFIG_PPC64) ) { >> + paddr_t xen_start, xen_end; >> + >> + xen_start = __pa(_start); >> + xen_end = PAGE_ALIGN(__pa(_end)); >> + if ( !add_boot_module(BOOTMOD_XEN, xen_start, xen_end, false) ) >> + panic("Xen overlaps reserved memory! %016lx - %016lx\n", >> xen_start, >> + xen_end); >> + } > > Can you explain why this is added here rather than in the caller of > boot_fdt_info()? Either before or after? > > If after, I guess it is because of early_print_info(). In which case, I > would suggest to move off early_print_info() to caller on each > architecture. > Right, it can't be after due to the early_print_info() call, but doing it before actually works fine. Thank you for pointing that out -- I'll change this in the next revision. > Cheers, > Thanks, Shawn
Hi Shawn, On 18/01/2024 01:36, Shawn Anastasio wrote: > Hi Julien, > > On 12/20/23 7:49 AM, Julien Grall wrote: >> Hi, >> >> On 15/12/2023 02:44, Shawn Anastasio wrote: >>> diff --git a/xen/common/device-tree/bootfdt.c >>> b/xen/common/device-tree/bootfdt.c >>> index 796ac01c18..7ddfcc7e2a 100644 >>> --- a/xen/common/device-tree/bootfdt.c >>> +++ b/xen/common/device-tree/bootfdt.c >>> @@ -543,12 +543,33 @@ size_t __init boot_fdt_info(const void *fdt, >>> paddr_t paddr) >>> if ( ret < 0 ) >>> panic("No valid device tree\n"); >>> - add_boot_module(BOOTMOD_FDT, paddr, fdt_totalsize(fdt), false); >>> - >>> ret = device_tree_for_each_node((void *)fdt, 0, early_scan_node, >>> NULL); >>> if ( ret ) >>> panic("Early FDT parsing failed (%d)\n", ret); >>> + /* >>> + * Add module for the FDT itself after the device tree has been >>> parsed. This >>> + * is required on ppc64le where the device tree passed to Xen may >>> have been >>> + * allocated by skiboot, in which case it will exist within a >>> reserved >>> + * region and this call will fail. This is fine, however, since >>> either way >>> + * the allocator will know not to step on the device tree. >>> + */ >>> + add_boot_module(BOOTMOD_FDT, paddr, fdt_totalsize(fdt), false); >> >> The consequence is BOOTMOD_FDT will not be added. AFAICT, Arm doesn't >> try to access BOOTMOD_FDT, but I think it would be worth to print message. >> > > A message will already be printed by `meminfo_overlap_check` in setup.c > when this condition is hit, like this: > > (XEN) Region: [0x0000003076e9b0, 0x00000030775215) overlapping with > bank[3]: [0x00000030600000, 0x00000031000000) This is somewhat non-descriptive. IOW it doesn't tell me that the overlap was with the FDT. But I guess this can be improved (this is not a request for you to send another patch). Cheers,
diff --git a/xen/arch/ppc/include/asm/Makefile b/xen/arch/ppc/include/asm/Makefile index 7167661f86..a711cfa856 100644 --- a/xen/arch/ppc/include/asm/Makefile +++ b/xen/arch/ppc/include/asm/Makefile @@ -6,6 +6,5 @@ generic-y += iocap.h generic-y += paging.h generic-y += percpu.h generic-y += random.h -generic-y += setup.h generic-y += static-shmem.h generic-y += vm_event.h diff --git a/xen/arch/ppc/include/asm/setup.h b/xen/arch/ppc/include/asm/setup.h new file mode 100644 index 0000000000..c0f700fc07 --- /dev/null +++ b/xen/arch/ppc/include/asm/setup.h @@ -0,0 +1,123 @@ +#ifndef __ASM_PPC_SETUP_H__ +#define __ASM_PPC_SETUP_H__ + +#define max_init_domid (0) + +#include <public/version.h> +#include <asm/p2m.h> +#include <xen/device_tree.h> + +#define MIN_FDT_ALIGN 8 +#define MAX_FDT_SIZE SZ_2M + +#define NR_MEM_BANKS 256 + +#define MAX_MODULES 32 /* Current maximum useful modules */ + +typedef enum { + BOOTMOD_XEN, + BOOTMOD_FDT, + BOOTMOD_KERNEL, + BOOTMOD_RAMDISK, + BOOTMOD_XSM, + BOOTMOD_GUEST_DTB, + BOOTMOD_UNKNOWN +} bootmodule_kind; + +enum membank_type { + /* + * The MEMBANK_DEFAULT type refers to either reserved memory for the + * device/firmware (when the bank is in 'reserved_mem') or any RAM (when + * the bank is in 'mem'). + */ + MEMBANK_DEFAULT, + /* + * The MEMBANK_STATIC_DOMAIN type is used to indicate whether the memory + * bank is bound to a static Xen domain. It is only valid when the bank + * is in reserved_mem. + */ + MEMBANK_STATIC_DOMAIN, + /* + * The MEMBANK_STATIC_HEAP type is used to indicate whether the memory + * bank is reserved as static heap. It is only valid when the bank is + * in reserved_mem. + */ + MEMBANK_STATIC_HEAP, +}; + +/* Indicates the maximum number of characters(\0 included) for shm_id */ +#define MAX_SHM_ID_LENGTH 16 + +struct membank { + paddr_t start; + paddr_t size; + enum membank_type type; +}; + +struct meminfo { + unsigned int nr_banks; + struct membank bank[NR_MEM_BANKS]; +}; + +/* + * The domU flag is set for kernels and ramdisks of "xen,domain" nodes. + * The purpose of the domU flag is to avoid getting confused in + * kernel_probe, where we try to guess which is the dom0 kernel and + * initrd to be compatible with all versions of the multiboot spec. + */ +#define BOOTMOD_MAX_CMDLINE 1024 +struct bootmodule { + bootmodule_kind kind; + bool domU; + paddr_t start; + paddr_t size; +}; + +/* DT_MAX_NAME is the node name max length according the DT spec */ +#define DT_MAX_NAME 41 +struct bootcmdline { + bootmodule_kind kind; + bool domU; + paddr_t start; + char dt_name[DT_MAX_NAME]; + char cmdline[BOOTMOD_MAX_CMDLINE]; +}; + +struct bootmodules { + int nr_mods; + struct bootmodule module[MAX_MODULES]; +}; + +struct bootcmdlines { + unsigned int nr_mods; + struct bootcmdline cmdline[MAX_MODULES]; +}; + +struct bootinfo { + struct meminfo mem; + struct meminfo reserved_mem; + struct bootmodules modules; + struct bootcmdlines cmdlines; + bool static_heap; +}; + +extern struct bootinfo bootinfo; + +/* + * setup.c + */ + +bool check_reserved_regions_overlap(paddr_t region_start, paddr_t region_size); +struct bootmodule *add_boot_module(bootmodule_kind kind, + paddr_t start, paddr_t size, bool domU); +void add_boot_cmdline(const char *name, const char *cmdline, + bootmodule_kind kind, paddr_t start, bool domU); +const char *boot_module_kind_as_string(bootmodule_kind kind); +struct bootcmdline * __init boot_cmdline_find_by_kind(bootmodule_kind kind); + +/* + * bootfdt.c + */ +size_t boot_fdt_info(const void *fdt, paddr_t paddr); + +#endif /* __ASM_PPC_SETUP_H__ */ diff --git a/xen/arch/ppc/setup.c b/xen/arch/ppc/setup.c index 101bdd8bb6..143f2e449e 100644 --- a/xen/arch/ppc/setup.c +++ b/xen/arch/ppc/setup.c @@ -1,16 +1,296 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ #include <xen/init.h> #include <xen/lib.h> +#include <xen/libfdt/libfdt.h> #include <xen/mm.h> #include <public/version.h> #include <asm/boot.h> #include <asm/early_printk.h> #include <asm/mm.h> #include <asm/processor.h> +#include <asm/setup.h> /* Xen stack for bringing up the first CPU. */ unsigned char __initdata cpu0_boot_stack[STACK_SIZE] __aligned(STACK_SIZE); +struct bootinfo __initdata bootinfo; + +void __init add_boot_cmdline(const char *name, const char *cmdline, + bootmodule_kind kind, paddr_t start, bool domU) +{ + struct bootcmdlines *cmds = &bootinfo.cmdlines; + struct bootcmdline *cmd; + + if ( cmds->nr_mods == MAX_MODULES ) + { + printk("Ignoring %s cmdline (too many)\n", name); + return; + } + + cmd = &cmds->cmdline[cmds->nr_mods++]; + cmd->kind = kind; + cmd->domU = domU; + cmd->start = start; + + ASSERT(strlen(name) <= DT_MAX_NAME); + safe_strcpy(cmd->dt_name, name); + + if ( strlen(cmdline) > BOOTMOD_MAX_CMDLINE ) + panic("module %s command line too long\n", name); + safe_strcpy(cmd->cmdline, cmdline); +} + +struct bootmodule __init *add_boot_module(bootmodule_kind kind, + paddr_t start, paddr_t size, + bool domU) +{ + struct bootmodules *mods = &bootinfo.modules; + struct bootmodule *mod; + unsigned int i; + + if ( mods->nr_mods == MAX_MODULES ) + { + printk("Ignoring %s boot module at %"PRIpaddr"-%"PRIpaddr" (too many)\n", + boot_module_kind_as_string(kind), start, start + size); + return NULL; + } + + if ( check_reserved_regions_overlap(start, size) ) + return NULL; + + for ( i = 0 ; i < mods->nr_mods ; i++ ) + { + mod = &mods->module[i]; + if ( mod->kind == kind && mod->start == start ) + { + if ( !domU ) + mod->domU = false; + return mod; + } + } + + mod = &mods->module[mods->nr_mods++]; + mod->kind = kind; + mod->start = start; + mod->size = size; + mod->domU = domU; + + return mod; +} + +const char * __init boot_module_kind_as_string(bootmodule_kind kind) +{ + switch ( kind ) + { + case BOOTMOD_XEN: return "Xen"; + case BOOTMOD_FDT: return "Device Tree"; + case BOOTMOD_KERNEL: return "Kernel"; + default: BUG(); + } +} + +/* + * TODO: '*_end' could be 0 if the module/region is at the end of the physical + * address space. This is for now not handled as it requires more rework. + */ +static bool __init bootmodules_overlap_check(struct bootmodules *bootmodules, + paddr_t region_start, + paddr_t region_size) +{ + paddr_t mod_start = INVALID_PADDR, mod_end = 0; + paddr_t region_end = region_start + region_size; + unsigned int i, mod_num = bootmodules->nr_mods; + + for ( i = 0; i < mod_num; i++ ) + { + mod_start = bootmodules->module[i].start; + mod_end = mod_start + bootmodules->module[i].size; + + if ( region_end <= mod_start || region_start >= mod_end ) + continue; + else + { + printk("Region: [%#"PRIpaddr", %#"PRIpaddr") overlapping with" + " mod[%u]: [%#"PRIpaddr", %#"PRIpaddr")\n", region_start, + region_end, i, mod_start, mod_end); + return true; + } + } + + return false; +} + +/* + * TODO: '*_end' could be 0 if the bank/region is at the end of the physical + * address space. This is for now not handled as it requires more rework. + */ +static bool __init meminfo_overlap_check(struct meminfo *meminfo, + paddr_t region_start, + paddr_t region_size) +{ + paddr_t bank_start = INVALID_PADDR, bank_end = 0; + paddr_t region_end = region_start + region_size; + unsigned int i, bank_num = meminfo->nr_banks; + + for ( i = 0; i < bank_num; i++ ) + { + bank_start = meminfo->bank[i].start; + bank_end = bank_start + meminfo->bank[i].size; + + if ( region_end <= bank_start || region_start >= bank_end ) + continue; + else + { + printk("Region: [%#"PRIpaddr", %#"PRIpaddr") overlapping with" + " bank[%u]: [%#"PRIpaddr", %#"PRIpaddr")\n", region_start, + region_end, i, bank_start, bank_end); + return true; + } + } + + return false; +} + +/* + * Given an input physical address range, check if this range is overlapping + * with the existing reserved memory regions defined in bootinfo. + * Return true if the input physical address range is overlapping with any + * existing reserved memory regions, otherwise false. + */ +bool __init check_reserved_regions_overlap(paddr_t region_start, + paddr_t region_size) +{ + /* Check if input region is overlapping with bootinfo.reserved_mem banks */ + if ( meminfo_overlap_check(&bootinfo.reserved_mem, + region_start, region_size) ) + return true; + + /* Check if input region is overlapping with bootmodules */ + if ( bootmodules_overlap_check(&bootinfo.modules, + region_start, region_size) ) + return true; + + return false; +} + +/* + * Return the end of the non-module region starting at s. In other + * words return s the start of the next modules after s. + * + * On input *end is the end of the region which should be considered + * and it is updated to reflect the end of the module, clipped to the + * end of the region if it would run over. + */ +static paddr_t __init next_module(paddr_t s, paddr_t *end) +{ + struct bootmodules *mi = &bootinfo.modules; + paddr_t lowest = ~(paddr_t)0; + int i; + + for ( i = 0; i < mi->nr_mods; i++ ) + { + paddr_t mod_s = mi->module[i].start; + paddr_t mod_e = mod_s + mi->module[i].size; + + if ( !mi->module[i].size ) + continue; + + if ( mod_s < s ) + continue; + if ( mod_s > lowest ) + continue; + if ( mod_s > *end ) + continue; + lowest = mod_s; + *end = min(*end, mod_e); + } + return lowest; +} + +static void __init dt_unreserved_regions(paddr_t s, paddr_t e, + void (*cb)(paddr_t ps, paddr_t pe), + unsigned int first) +{ + unsigned int i; + + for ( i = 0 ; i < bootinfo.reserved_mem.nr_banks; i++ ) + { + paddr_t r_s = bootinfo.reserved_mem.bank[i].start; + paddr_t r_e = r_s + bootinfo.reserved_mem.bank[i].size; + + if ( s < r_e && r_s < e ) + { + dt_unreserved_regions(r_e, e, cb, i + 1); + dt_unreserved_regions(s, r_s, cb, i + 1); + return; + } + } + + cb(s, e); +} + +/* + * boot_cmdline_find_by_kind can only be used to return Xen modules (e.g + * XSM, DTB) or Dom0 modules. This is not suitable for looking up guest + * modules. + */ +struct bootcmdline * __init boot_cmdline_find_by_kind(bootmodule_kind kind) +{ + struct bootcmdlines *cmds = &bootinfo.cmdlines; + struct bootcmdline *cmd; + int i; + + for ( i = 0 ; i < cmds->nr_mods ; i++ ) + { + cmd = &cmds->cmdline[i]; + if ( cmd->kind == kind && !cmd->domU ) + return cmd; + } + return NULL; +} + +/* + * Populate the boot allocator. Based on arch/arm/setup.c's + * populate_boot_allocator. + * All RAM but the following regions will be added to the boot allocator: + * - Modules (e.g., Xen, Kernel) + * - Reserved regions + */ +static void __init populate_boot_allocator(void) +{ + unsigned int i; + const struct meminfo *banks = &bootinfo.mem; + paddr_t s, e; + + for ( i = 0; i < banks->nr_banks; i++ ) + { + const struct membank *bank = &banks->bank[i]; + paddr_t bank_end = bank->start + bank->size; + + s = bank->start; + while ( s < bank_end ) + { + paddr_t n = bank_end; + + e = next_module(s, &n); + + if ( e == ~(paddr_t)0 ) + e = n = bank_end; + + /* + * Module in a RAM bank other than the one which we are + * not dealing with here. + */ + if ( e > bank_end ) + e = bank_end; + + dt_unreserved_regions(s, e, init_boot_pages, 0); + + s = n; + } + } +} + void setup_exceptions(void) { unsigned long lpcr; @@ -24,6 +304,8 @@ void __init noreturn start_xen(unsigned long r3, unsigned long r4, unsigned long r5, unsigned long r6, unsigned long r7) { + void *boot_fdt; + if ( r5 ) { /* Unsupported OpenFirmware boot protocol */ @@ -32,11 +314,16 @@ void __init noreturn start_xen(unsigned long r3, unsigned long r4, else { /* kexec boot protocol */ - boot_opal_init((void *)r3); + boot_fdt = (void *)r3; + boot_opal_init(boot_fdt); } setup_exceptions(); + boot_fdt_info(boot_fdt, r3); + + populate_boot_allocator(); + setup_initial_pagetables(); early_printk("Hello, ppc64le!\n"); diff --git a/xen/common/device-tree/bootfdt.c b/xen/common/device-tree/bootfdt.c index 796ac01c18..7ddfcc7e2a 100644 --- a/xen/common/device-tree/bootfdt.c +++ b/xen/common/device-tree/bootfdt.c @@ -543,12 +543,33 @@ size_t __init boot_fdt_info(const void *fdt, paddr_t paddr) if ( ret < 0 ) panic("No valid device tree\n"); - add_boot_module(BOOTMOD_FDT, paddr, fdt_totalsize(fdt), false); - ret = device_tree_for_each_node((void *)fdt, 0, early_scan_node, NULL); if ( ret ) panic("Early FDT parsing failed (%d)\n", ret); + /* + * Add module for the FDT itself after the device tree has been parsed. This + * is required on ppc64le where the device tree passed to Xen may have been + * allocated by skiboot, in which case it will exist within a reserved + * region and this call will fail. This is fine, however, since either way + * the allocator will know not to step on the device tree. + */ + add_boot_module(BOOTMOD_FDT, paddr, fdt_totalsize(fdt), false); + + /* + * Xen relocates itself at the ppc64 entrypoint, so we need to manually mark + * the kernel module. + */ + if ( IS_ENABLED(CONFIG_PPC64) ) { + paddr_t xen_start, xen_end; + + xen_start = __pa(_start); + xen_end = PAGE_ALIGN(__pa(_end)); + if ( !add_boot_module(BOOTMOD_XEN, xen_start, xen_end, false) ) + panic("Xen overlaps reserved memory! %016lx - %016lx\n", xen_start, + xen_end); + } + /* * On Arm64 setup_directmap_mappings() expects to be called with the lowest * bank in memory first. There is no requirement that the DT will provide
Move PPC off the asm-generic setup.h and enable usage of bootfdt for populating the boot info struct from the firmware-provided device tree. Also enable the Xen boot page allocator. Includes minor changes to bootfdt.c's boot_fdt_info() to tolerate the scenario in which the FDT overlaps a reserved memory region, as is the case on PPC when booted directly from skiboot. Also includes a minor change to record Xen's correct position on PPC where Xen relocates itself to at the entrypoint. Signed-off-by: Shawn Anastasio <sanastasio@raptorengineering.com> --- xen/arch/ppc/include/asm/Makefile | 1 - xen/arch/ppc/include/asm/setup.h | 123 +++++++++++++ xen/arch/ppc/setup.c | 289 +++++++++++++++++++++++++++++- xen/common/device-tree/bootfdt.c | 25 ++- 4 files changed, 434 insertions(+), 4 deletions(-) create mode 100644 xen/arch/ppc/include/asm/setup.h