Message ID | 20220227030717.1464-4-thunder.leizhen@huawei.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | support reserving crashkernel above 4G on arm64 kdump | expand |
On 02/27/22 at 11:07am, Zhen Lei wrote: > From: Chen Zhou <chenzhou10@huawei.com> > > There are following issues in arm64 kdump: > 1. We use crashkernel=X to reserve crashkernel below 4G, which > will fail when there is no enough low memory. > 2. If reserving crashkernel above 4G, in this case, crash dump > kernel will boot failure because there is no low memory available > for allocation. > > To solve these issues, change the behavior of crashkernel=X and > introduce crashkernel=X,[high,low]. crashkernel=X tries low allocation > in DMA zone, and fall back to high allocation if it fails. > We can also use "crashkernel=X,high" to select a region above DMA zone, > which also tries to allocate at least 256M in DMA zone automatically. > "crashkernel=Y,low" can be used to allocate specified size low memory. > > Signed-off-by: Chen Zhou <chenzhou10@huawei.com> > Co-developed-by: Zhen Lei <thunder.leizhen@huawei.com> > Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com> > --- > arch/arm64/kernel/machine_kexec.c | 9 ++- > arch/arm64/kernel/machine_kexec_file.c | 12 ++- > arch/arm64/mm/init.c | 106 +++++++++++++++++++++++-- > 3 files changed, 115 insertions(+), 12 deletions(-) > > diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c > index e16b248699d5c3c..19c2d487cb08feb 100644 > --- a/arch/arm64/kernel/machine_kexec.c > +++ b/arch/arm64/kernel/machine_kexec.c > @@ -329,8 +329,13 @@ bool crash_is_nosave(unsigned long pfn) > > /* in reserved memory? */ > addr = __pfn_to_phys(pfn); > - if ((addr < crashk_res.start) || (crashk_res.end < addr)) > - return false; > + if ((addr < crashk_res.start) || (crashk_res.end < addr)) { > + if (!crashk_low_res.end) > + return false; > + > + if ((addr < crashk_low_res.start) || (crashk_low_res.end < addr)) > + return false; > + } > > if (!kexec_crash_image) > return true; > diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c > index 59c648d51848886..889951291cc0f9c 100644 > --- a/arch/arm64/kernel/machine_kexec_file.c > +++ b/arch/arm64/kernel/machine_kexec_file.c > @@ -65,10 +65,18 @@ static int prepare_elf_headers(void **addr, unsigned long *sz) > > /* Exclude crashkernel region */ > ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end); > + if (ret) > + goto out; > + > + if (crashk_low_res.end) { > + ret = crash_exclude_mem_range(cmem, crashk_low_res.start, crashk_low_res.end); > + if (ret) > + goto out; > + } > > - if (!ret) > - ret = crash_prepare_elf64_headers(cmem, true, addr, sz); > + ret = crash_prepare_elf64_headers(cmem, true, addr, sz); > > +out: > kfree(cmem); > return ret; > } > diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c > index 90f276d46b93bc6..30ae6638ff54c47 100644 > --- a/arch/arm64/mm/init.c > +++ b/arch/arm64/mm/init.c > @@ -65,6 +65,44 @@ EXPORT_SYMBOL(memstart_addr); > phys_addr_t arm64_dma_phys_limit __ro_after_init; > > #ifdef CONFIG_KEXEC_CORE > +/* Current arm64 boot protocol requires 2MB alignment */ > +#define CRASH_ALIGN SZ_2M > + > +#define CRASH_ADDR_LOW_MAX arm64_dma_phys_limit > +#define CRASH_ADDR_HIGH_MAX memblock.current_limit > + > +/* > + * This is an empirical value in x86_64 and taken here directly. Please > + * refer to the code comment in reserve_crashkernel_low() of x86_64 for more > + * details. > + */ > +#define DEFAULT_CRASH_KERNEL_LOW_SIZE \ > + max(swiotlb_size_or_default() + (8UL << 20), 256UL << 20) > + > +static int __init reserve_crashkernel_low(unsigned long long low_size) > +{ > + unsigned long long low_base; > + > + /* passed with crashkernel=0,low ? */ > + if (!low_size) > + return 0; > + > + low_base = memblock_phys_alloc_range(low_size, CRASH_ALIGN, 0, CRASH_ADDR_LOW_MAX); > + if (!low_base) { > + pr_err("cannot allocate crashkernel low memory (size:0x%llx).\n", low_size); > + return -ENOMEM; > + } > + > + pr_info("crashkernel low memory reserved: 0x%08llx - 0x%08llx (%lld MB)\n", > + low_base, low_base + low_size, low_size >> 20); > + > + crashk_low_res.start = low_base; > + crashk_low_res.end = low_base + low_size - 1; > + insert_resource(&iomem_resource, &crashk_low_res); > + > + return 0; > +} > + > /* > * reserve_crashkernel() - reserves memory for crash kernel > * > @@ -75,30 +113,79 @@ phys_addr_t arm64_dma_phys_limit __ro_after_init; > static void __init reserve_crashkernel(void) > { > unsigned long long crash_base, crash_size; > - unsigned long long crash_max = arm64_dma_phys_limit; > + unsigned long long crash_low_size; > + unsigned long long crash_max = CRASH_ADDR_LOW_MAX; > int ret; Even though reverse xmas tree style is not enforced, this 'int ret;' is really annoying to look at. Maybe move it down two lines. > + bool fixed_base, high = false; > + char *cmdline = boot_command_line; > > - ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), > + /* crashkernel=X[@offset] */ > + ret = parse_crashkernel(cmdline, memblock_phys_mem_size(), > &crash_size, &crash_base); > - /* no crashkernel= or invalid value specified */ > - if (ret || !crash_size) > - return; > + if (ret || !crash_size) { > + /* crashkernel=X,high */ > + ret = parse_crashkernel_high(cmdline, 0, &crash_size, &crash_base); > + if (ret || !crash_size) > + return; > + > + /* crashkernel=Y,low */ > + ret = parse_crashkernel_low(cmdline, 0, &crash_low_size, &crash_base); > + if (ret == -ENOENT) > + /* > + * crashkernel=Y,low is not specified explicitly, use > + * default size automatically. > + */ > + crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE; > + else if (ret) > + /* crashkernel=Y,low is specified but Y is invalid */ > + return; > + > + /* Mark crashkernel=X,high is specified */ > + high = true; > + crash_max = CRASH_ADDR_HIGH_MAX; > + } > > + fixed_base = !!crash_base; > crash_size = PAGE_ALIGN(crash_size); > > /* User specifies base address explicitly. */ This is over commenting, can't see why it's needed. > - if (crash_base) > + if (fixed_base) > crash_max = crash_base + crash_size; Hi leizhen, I made change on reserve_crashkenrel(), inline comment may be slow. Please check and consider if they can be taken. diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 30ae6638ff54..f96351da1e3e 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -109,38 +109,43 @@ static int __init reserve_crashkernel_low(unsigned long long low_size) * This function reserves memory area given in "crashkernel=" kernel command * line parameter. The memory reserved is used by dump capture kernel when * primary kernel is crashing. + * + * NOTE: Reservation of crashkernel,low is special since its existence + * is not independent, need rely on the existence of crashkernel,high. + * Hence there are different cases for crashkernel,low reservation: + * 1) crashkernel=Y,low is specified explicitly, crashkernel,low takes Y; + * 2) crashkernel=,low is not given, while crashkernel=,high is specified, + * take the default crashkernel,low value; + * 3) crashkernel=X is specified, while fallback to get a memory region + * in high memory, take the default crashkernel,low value; + * 4) crashkernel='invalid value',low is specified, failed the whole + * crashkernel reservation and bail out. */ static void __init reserve_crashkernel(void) { unsigned long long crash_base, crash_size; unsigned long long crash_low_size; unsigned long long crash_max = CRASH_ADDR_LOW_MAX; - int ret; bool fixed_base, high = false; char *cmdline = boot_command_line; + int ret; /* crashkernel=X[@offset] */ ret = parse_crashkernel(cmdline, memblock_phys_mem_size(), &crash_size, &crash_base); if (ret || !crash_size) { - /* crashkernel=X,high */ ret = parse_crashkernel_high(cmdline, 0, &crash_size, &crash_base); if (ret || !crash_size) return; - /* crashkernel=Y,low */ ret = parse_crashkernel_low(cmdline, 0, &crash_low_size, &crash_base); if (ret == -ENOENT) - /* - * crashkernel=Y,low is not specified explicitly, use - * default size automatically. - */ + /* case #2 of crashkernel,low reservation */ crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE; else if (ret) - /* crashkernel=Y,low is specified but Y is invalid */ + /* case #4 of crashkernel,low reservation */ return; - /* Mark crashkernel=X,high is specified */ high = true; crash_max = CRASH_ADDR_HIGH_MAX; } @@ -148,7 +153,6 @@ static void __init reserve_crashkernel(void) fixed_base = !!crash_base; crash_size = PAGE_ALIGN(crash_size); - /* User specifies base address explicitly. */ if (fixed_base) crash_max = crash_base + crash_size; @@ -172,11 +176,7 @@ static void __init reserve_crashkernel(void) } if (crash_base >= SZ_4G) { - /* - * For case crashkernel=X, low memory is not enough and fall - * back to reserve specified size of memory above 4G, try to - * allocate minimum required memory below 4G again. - */ + /* case #3 of crashkernel,low reservation */ if (!high) crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE; > > - /* Current arm64 boot protocol requires 2MB alignment */ > - crash_base = memblock_phys_alloc_range(crash_size, SZ_2M, > +retry: > + crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN, > crash_base, crash_max); > if (!crash_base) { > + /* > + * Attempt to fully allocate low memory failed, fall back > + * to high memory, the minimum required low memory will be > + * reserved later. > + */ > + if (!fixed_base && (crash_max == CRASH_ADDR_LOW_MAX)) { > + crash_max = CRASH_ADDR_HIGH_MAX; > + goto retry; > + } > + > pr_warn("cannot allocate crashkernel (size:0x%llx)\n", > crash_size); > return; > } > > + if (crash_base >= SZ_4G) { > + /* > + * For case crashkernel=X, low memory is not enough and fall > + * back to reserve specified size of memory above 4G, try to > + * allocate minimum required memory below 4G again. > + */ > + if (!high) > + crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE; > + > + if (reserve_crashkernel_low(crash_low_size)) { > + memblock_phys_free(crash_base, crash_size); > + return; > + } > + } > + > pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n", > crash_base, crash_base + crash_size, crash_size >> 20); > > @@ -107,6 +194,9 @@ static void __init reserve_crashkernel(void) > * map. Inform kmemleak so that it won't try to access it. > */ > kmemleak_ignore_phys(crash_base); > + if (crashk_low_res.end) > + kmemleak_ignore_phys(crashk_low_res.start); > + > crashk_res.start = crash_base; > crashk_res.end = crash_base + crash_size - 1; > insert_resource(&iomem_resource, &crashk_res); > -- > 2.25.1 >
On 2022/3/16 20:11, Baoquan He wrote: > On 02/27/22 at 11:07am, Zhen Lei wrote: >> From: Chen Zhou <chenzhou10@huawei.com> >> >> There are following issues in arm64 kdump: >> 1. We use crashkernel=X to reserve crashkernel below 4G, which >> will fail when there is no enough low memory. >> 2. If reserving crashkernel above 4G, in this case, crash dump >> kernel will boot failure because there is no low memory available >> for allocation. >> >> To solve these issues, change the behavior of crashkernel=X and >> introduce crashkernel=X,[high,low]. crashkernel=X tries low allocation >> in DMA zone, and fall back to high allocation if it fails. >> We can also use "crashkernel=X,high" to select a region above DMA zone, >> which also tries to allocate at least 256M in DMA zone automatically. >> "crashkernel=Y,low" can be used to allocate specified size low memory. >> >> Signed-off-by: Chen Zhou <chenzhou10@huawei.com> >> Co-developed-by: Zhen Lei <thunder.leizhen@huawei.com> >> Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com> >> --- >> arch/arm64/kernel/machine_kexec.c | 9 ++- >> arch/arm64/kernel/machine_kexec_file.c | 12 ++- >> arch/arm64/mm/init.c | 106 +++++++++++++++++++++++-- >> 3 files changed, 115 insertions(+), 12 deletions(-) >> >> diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c >> index e16b248699d5c3c..19c2d487cb08feb 100644 >> --- a/arch/arm64/kernel/machine_kexec.c >> +++ b/arch/arm64/kernel/machine_kexec.c >> @@ -329,8 +329,13 @@ bool crash_is_nosave(unsigned long pfn) >> >> /* in reserved memory? */ >> addr = __pfn_to_phys(pfn); >> - if ((addr < crashk_res.start) || (crashk_res.end < addr)) >> - return false; >> + if ((addr < crashk_res.start) || (crashk_res.end < addr)) { >> + if (!crashk_low_res.end) >> + return false; >> + >> + if ((addr < crashk_low_res.start) || (crashk_low_res.end < addr)) >> + return false; >> + } >> >> if (!kexec_crash_image) >> return true; >> diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c >> index 59c648d51848886..889951291cc0f9c 100644 >> --- a/arch/arm64/kernel/machine_kexec_file.c >> +++ b/arch/arm64/kernel/machine_kexec_file.c >> @@ -65,10 +65,18 @@ static int prepare_elf_headers(void **addr, unsigned long *sz) >> >> /* Exclude crashkernel region */ >> ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end); >> + if (ret) >> + goto out; >> + >> + if (crashk_low_res.end) { >> + ret = crash_exclude_mem_range(cmem, crashk_low_res.start, crashk_low_res.end); >> + if (ret) >> + goto out; >> + } >> >> - if (!ret) >> - ret = crash_prepare_elf64_headers(cmem, true, addr, sz); >> + ret = crash_prepare_elf64_headers(cmem, true, addr, sz); >> >> +out: >> kfree(cmem); >> return ret; >> } >> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c >> index 90f276d46b93bc6..30ae6638ff54c47 100644 >> --- a/arch/arm64/mm/init.c >> +++ b/arch/arm64/mm/init.c >> @@ -65,6 +65,44 @@ EXPORT_SYMBOL(memstart_addr); >> phys_addr_t arm64_dma_phys_limit __ro_after_init; >> >> #ifdef CONFIG_KEXEC_CORE >> +/* Current arm64 boot protocol requires 2MB alignment */ >> +#define CRASH_ALIGN SZ_2M >> + >> +#define CRASH_ADDR_LOW_MAX arm64_dma_phys_limit >> +#define CRASH_ADDR_HIGH_MAX memblock.current_limit >> + >> +/* >> + * This is an empirical value in x86_64 and taken here directly. Please >> + * refer to the code comment in reserve_crashkernel_low() of x86_64 for more >> + * details. >> + */ >> +#define DEFAULT_CRASH_KERNEL_LOW_SIZE \ >> + max(swiotlb_size_or_default() + (8UL << 20), 256UL << 20) >> + >> +static int __init reserve_crashkernel_low(unsigned long long low_size) >> +{ >> + unsigned long long low_base; >> + >> + /* passed with crashkernel=0,low ? */ >> + if (!low_size) >> + return 0; >> + >> + low_base = memblock_phys_alloc_range(low_size, CRASH_ALIGN, 0, CRASH_ADDR_LOW_MAX); >> + if (!low_base) { >> + pr_err("cannot allocate crashkernel low memory (size:0x%llx).\n", low_size); >> + return -ENOMEM; >> + } >> + >> + pr_info("crashkernel low memory reserved: 0x%08llx - 0x%08llx (%lld MB)\n", >> + low_base, low_base + low_size, low_size >> 20); >> + >> + crashk_low_res.start = low_base; >> + crashk_low_res.end = low_base + low_size - 1; >> + insert_resource(&iomem_resource, &crashk_low_res); >> + >> + return 0; >> +} >> + >> /* >> * reserve_crashkernel() - reserves memory for crash kernel >> * >> @@ -75,30 +113,79 @@ phys_addr_t arm64_dma_phys_limit __ro_after_init; >> static void __init reserve_crashkernel(void) >> { >> unsigned long long crash_base, crash_size; >> - unsigned long long crash_max = arm64_dma_phys_limit; >> + unsigned long long crash_low_size; >> + unsigned long long crash_max = CRASH_ADDR_LOW_MAX; >> int ret; > > Even though reverse xmas tree style is not enforced, this 'int ret;' is > really annoying to look at. Maybe move it down two lines. > >> + bool fixed_base, high = false; >> + char *cmdline = boot_command_line; >> >> - ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), >> + /* crashkernel=X[@offset] */ >> + ret = parse_crashkernel(cmdline, memblock_phys_mem_size(), >> &crash_size, &crash_base); >> - /* no crashkernel= or invalid value specified */ >> - if (ret || !crash_size) >> - return; >> + if (ret || !crash_size) { >> + /* crashkernel=X,high */ >> + ret = parse_crashkernel_high(cmdline, 0, &crash_size, &crash_base); >> + if (ret || !crash_size) >> + return; >> + >> + /* crashkernel=Y,low */ >> + ret = parse_crashkernel_low(cmdline, 0, &crash_low_size, &crash_base); >> + if (ret == -ENOENT) >> + /* >> + * crashkernel=Y,low is not specified explicitly, use >> + * default size automatically. >> + */ >> + crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE; >> + else if (ret) >> + /* crashkernel=Y,low is specified but Y is invalid */ >> + return; >> + >> + /* Mark crashkernel=X,high is specified */ >> + high = true; >> + crash_max = CRASH_ADDR_HIGH_MAX; >> + } >> >> + fixed_base = !!crash_base; >> crash_size = PAGE_ALIGN(crash_size); >> >> /* User specifies base address explicitly. */ > This is over commenting, can't see why it's needed. >> - if (crash_base) >> + if (fixed_base) >> crash_max = crash_base + crash_size; > > Hi leizhen, > > I made change on reserve_crashkenrel(), inline comment may be slow. > Please check and consider if they can be taken. That's great. Thank you very much. > > diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c > index 30ae6638ff54..f96351da1e3e 100644 > --- a/arch/arm64/mm/init.c > +++ b/arch/arm64/mm/init.c > @@ -109,38 +109,43 @@ static int __init reserve_crashkernel_low(unsigned long long low_size) > * This function reserves memory area given in "crashkernel=" kernel command > * line parameter. The memory reserved is used by dump capture kernel when > * primary kernel is crashing. > + * > + * NOTE: Reservation of crashkernel,low is special since its existence > + * is not independent, need rely on the existence of crashkernel,high. > + * Hence there are different cases for crashkernel,low reservation: > + * 1) crashkernel=Y,low is specified explicitly, crashkernel,low takes Y; > + * 2) crashkernel=,low is not given, while crashkernel=,high is specified, > + * take the default crashkernel,low value; > + * 3) crashkernel=X is specified, while fallback to get a memory region > + * in high memory, take the default crashkernel,low value; > + * 4) crashkernel='invalid value',low is specified, failed the whole > + * crashkernel reservation and bail out. > */ > static void __init reserve_crashkernel(void) > { > unsigned long long crash_base, crash_size; > unsigned long long crash_low_size; > unsigned long long crash_max = CRASH_ADDR_LOW_MAX; > - int ret; > bool fixed_base, high = false; > char *cmdline = boot_command_line; > + int ret; > > /* crashkernel=X[@offset] */ > ret = parse_crashkernel(cmdline, memblock_phys_mem_size(), > &crash_size, &crash_base); > if (ret || !crash_size) { > - /* crashkernel=X,high */ > ret = parse_crashkernel_high(cmdline, 0, &crash_size, &crash_base); > if (ret || !crash_size) > return; > > - /* crashkernel=Y,low */ > ret = parse_crashkernel_low(cmdline, 0, &crash_low_size, &crash_base); > if (ret == -ENOENT) > - /* > - * crashkernel=Y,low is not specified explicitly, use > - * default size automatically. > - */ > + /* case #2 of crashkernel,low reservation */ > crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE; > else if (ret) > - /* crashkernel=Y,low is specified but Y is invalid */ > + /* case #4 of crashkernel,low reservation */ > return; > > - /* Mark crashkernel=X,high is specified */ > high = true; > crash_max = CRASH_ADDR_HIGH_MAX; > } > @@ -148,7 +153,6 @@ static void __init reserve_crashkernel(void) > fixed_base = !!crash_base; > crash_size = PAGE_ALIGN(crash_size); > > - /* User specifies base address explicitly. */ > if (fixed_base) > crash_max = crash_base + crash_size; > > @@ -172,11 +176,7 @@ static void __init reserve_crashkernel(void) > } > > if (crash_base >= SZ_4G) { > - /* > - * For case crashkernel=X, low memory is not enough and fall > - * back to reserve specified size of memory above 4G, try to > - * allocate minimum required memory below 4G again. > - */ > + /* case #3 of crashkernel,low reservation */ > if (!high) > crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE; > > >> >> - /* Current arm64 boot protocol requires 2MB alignment */ >> - crash_base = memblock_phys_alloc_range(crash_size, SZ_2M, >> +retry: >> + crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN, >> crash_base, crash_max); >> if (!crash_base) { >> + /* >> + * Attempt to fully allocate low memory failed, fall back >> + * to high memory, the minimum required low memory will be >> + * reserved later. >> + */ >> + if (!fixed_base && (crash_max == CRASH_ADDR_LOW_MAX)) { >> + crash_max = CRASH_ADDR_HIGH_MAX; >> + goto retry; >> + } >> + >> pr_warn("cannot allocate crashkernel (size:0x%llx)\n", >> crash_size); >> return; >> } >> >> + if (crash_base >= SZ_4G) { >> + /* >> + * For case crashkernel=X, low memory is not enough and fall >> + * back to reserve specified size of memory above 4G, try to >> + * allocate minimum required memory below 4G again. >> + */ >> + if (!high) >> + crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE; >> + >> + if (reserve_crashkernel_low(crash_low_size)) { >> + memblock_phys_free(crash_base, crash_size); >> + return; >> + } >> + } >> + >> pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n", >> crash_base, crash_base + crash_size, crash_size >> 20); >> >> @@ -107,6 +194,9 @@ static void __init reserve_crashkernel(void) >> * map. Inform kmemleak so that it won't try to access it. >> */ >> kmemleak_ignore_phys(crash_base); >> + if (crashk_low_res.end) >> + kmemleak_ignore_phys(crashk_low_res.start); >> + >> crashk_res.start = crash_base; >> crashk_res.end = crash_base + crash_size - 1; >> insert_resource(&iomem_resource, &crashk_res); >> -- >> 2.25.1 >> > > . >
On 03/16/22 at 09:11pm, Leizhen (ThunderTown) wrote: > > > On 2022/3/16 20:11, Baoquan He wrote: > > On 02/27/22 at 11:07am, Zhen Lei wrote: ...... > > Hi leizhen, > > > > I made change on reserve_crashkenrel(), inline comment may be slow. > > Please check and consider if they can be taken. > > That's great. Thank you very much. > > > > > diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c > > index 30ae6638ff54..f96351da1e3e 100644 > > --- a/arch/arm64/mm/init.c > > +++ b/arch/arm64/mm/init.c > > @@ -109,38 +109,43 @@ static int __init reserve_crashkernel_low(unsigned long long low_size) > > * This function reserves memory area given in "crashkernel=" kernel command > > * line parameter. The memory reserved is used by dump capture kernel when > > * primary kernel is crashing. > > + * > > + * NOTE: Reservation of crashkernel,low is special since its existence > > + * is not independent, need rely on the existence of crashkernel,high. > > + * Hence there are different cases for crashkernel,low reservation: Considering to update the 3rd line as below: * NOTE: Reservation of crashkernel,low is special since its existence * is not independent, need rely on the existence of crashkernel,high. * Here, four cases of crashkernel,low reservation are summarized: > > + * 1) crashkernel=Y,low is specified explicitly, crashkernel,low takes Y; > > + * 2) crashkernel=,low is not given, while crashkernel=,high is specified, > > + * take the default crashkernel,low value; > > + * 3) crashkernel=X is specified, while fallback to get a memory region > > + * in high memory, take the default crashkernel,low value; > > + * 4) crashkernel='invalid value',low is specified, failed the whole > > + * crashkernel reservation and bail out. > > */ > > static void __init reserve_crashkernel(void) > > { > > unsigned long long crash_base, crash_size; > > unsigned long long crash_low_size; > > unsigned long long crash_max = CRASH_ADDR_LOW_MAX; > > - int ret; > > bool fixed_base, high = false; > > char *cmdline = boot_command_line; > > + int ret; > > > > /* crashkernel=X[@offset] */ > > ret = parse_crashkernel(cmdline, memblock_phys_mem_size(), > > &crash_size, &crash_base); > > if (ret || !crash_size) { > > - /* crashkernel=X,high */ > > ret = parse_crashkernel_high(cmdline, 0, &crash_size, &crash_base); > > if (ret || !crash_size) > > return; > > > > - /* crashkernel=Y,low */ > > ret = parse_crashkernel_low(cmdline, 0, &crash_low_size, &crash_base); > > if (ret == -ENOENT) > > - /* > > - * crashkernel=Y,low is not specified explicitly, use > > - * default size automatically. > > - */ > > + /* case #2 of crashkernel,low reservation */ > > crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE; > > else if (ret) > > - /* crashkernel=Y,low is specified but Y is invalid */ > > + /* case #4 of crashkernel,low reservation */ > > return; > > > > - /* Mark crashkernel=X,high is specified */ > > high = true; > > crash_max = CRASH_ADDR_HIGH_MAX; > > } > > @@ -148,7 +153,6 @@ static void __init reserve_crashkernel(void) > > fixed_base = !!crash_base; > > crash_size = PAGE_ALIGN(crash_size); > > > > - /* User specifies base address explicitly. */ > > if (fixed_base) > > crash_max = crash_base + crash_size; > > > > @@ -172,11 +176,7 @@ static void __init reserve_crashkernel(void) > > } > > > > if (crash_base >= SZ_4G) { > > - /* > > - * For case crashkernel=X, low memory is not enough and fall > > - * back to reserve specified size of memory above 4G, try to > > - * allocate minimum required memory below 4G again. > > - */ > > + /* case #3 of crashkernel,low reservation */ > > if (!high) > > crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE; > > > > > >> > >> - /* Current arm64 boot protocol requires 2MB alignment */ > >> - crash_base = memblock_phys_alloc_range(crash_size, SZ_2M, > >> +retry: > >> + crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN, > >> crash_base, crash_max); > >> if (!crash_base) { > >> + /* > >> + * Attempt to fully allocate low memory failed, fall back > >> + * to high memory, the minimum required low memory will be > >> + * reserved later. > >> + */ > >> + if (!fixed_base && (crash_max == CRASH_ADDR_LOW_MAX)) { > >> + crash_max = CRASH_ADDR_HIGH_MAX; > >> + goto retry; > >> + } > >> + > >> pr_warn("cannot allocate crashkernel (size:0x%llx)\n", > >> crash_size); > >> return; > >> } > >> > >> + if (crash_base >= SZ_4G) { > >> + /* > >> + * For case crashkernel=X, low memory is not enough and fall > >> + * back to reserve specified size of memory above 4G, try to > >> + * allocate minimum required memory below 4G again. > >> + */ > >> + if (!high) > >> + crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE; > >> + > >> + if (reserve_crashkernel_low(crash_low_size)) { > >> + memblock_phys_free(crash_base, crash_size); > >> + return; > >> + } > >> + } > >> + > >> pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n", > >> crash_base, crash_base + crash_size, crash_size >> 20); > >> > >> @@ -107,6 +194,9 @@ static void __init reserve_crashkernel(void) > >> * map. Inform kmemleak so that it won't try to access it. > >> */ > >> kmemleak_ignore_phys(crash_base); > >> + if (crashk_low_res.end) > >> + kmemleak_ignore_phys(crashk_low_res.start); > >> + > >> crashk_res.start = crash_base; > >> crashk_res.end = crash_base + crash_size - 1; > >> insert_resource(&iomem_resource, &crashk_res); > >> -- > >> 2.25.1 > >> > > > > . > > > > -- > Regards, > Zhen Lei >
On 02/27/22 at 11:07am, Zhen Lei wrote: > From: Chen Zhou <chenzhou10@huawei.com> > > There are following issues in arm64 kdump: > 1. We use crashkernel=X to reserve crashkernel below 4G, which > will fail when there is no enough low memory. > 2. If reserving crashkernel above 4G, in this case, crash dump > kernel will boot failure because there is no low memory available ~~ change it to "get boot failure" or "fail to boot" > for allocation. > > To solve these issues, change the behavior of crashkernel=X and > introduce crashkernel=X,[high,low]. crashkernel=X tries low allocation > in DMA zone, and fall back to high allocation if it fails. > We can also use "crashkernel=X,high" to select a region above DMA zone, > which also tries to allocate at least 256M in DMA zone automatically. > "crashkernel=Y,low" can be used to allocate specified size low memory. > > Signed-off-by: Chen Zhou <chenzhou10@huawei.com> > Co-developed-by: Zhen Lei <thunder.leizhen@huawei.com> > Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
On 2022/3/17 10:36, Baoquan He wrote: > On 03/16/22 at 09:11pm, Leizhen (ThunderTown) wrote: >> >> >> On 2022/3/16 20:11, Baoquan He wrote: >>> On 02/27/22 at 11:07am, Zhen Lei wrote: > ...... > >>> Hi leizhen, >>> >>> I made change on reserve_crashkenrel(), inline comment may be slow. >>> Please check and consider if they can be taken. >> >> That's great. Thank you very much. >> >>> >>> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c >>> index 30ae6638ff54..f96351da1e3e 100644 >>> --- a/arch/arm64/mm/init.c >>> +++ b/arch/arm64/mm/init.c >>> @@ -109,38 +109,43 @@ static int __init reserve_crashkernel_low(unsigned long long low_size) >>> * This function reserves memory area given in "crashkernel=" kernel command >>> * line parameter. The memory reserved is used by dump capture kernel when >>> * primary kernel is crashing. >>> + * >>> + * NOTE: Reservation of crashkernel,low is special since its existence >>> + * is not independent, need rely on the existence of crashkernel,high. >>> + * Hence there are different cases for crashkernel,low reservation: > > Considering to update the 3rd line as below: > > * NOTE: Reservation of crashkernel,low is special since its existence > * is not independent, need rely on the existence of crashkernel,high. > * Here, four cases of crashkernel,low reservation are summarized: OK. How about change "crashkernel,low" to "crashkernel low memory"? "crashkernel=Y,low", "crashkernel=,low" and "crashkernel,low" are very similar, may dazzle the reader. > >>> + * 1) crashkernel=Y,low is specified explicitly, crashkernel,low takes Y; >>> + * 2) crashkernel=,low is not given, while crashkernel=,high is specified, >>> + * take the default crashkernel,low value; >>> + * 3) crashkernel=X is specified, while fallback to get a memory region >>> + * in high memory, take the default crashkernel,low value; >>> + * 4) crashkernel='invalid value',low is specified, failed the whole >>> + * crashkernel reservation and bail out. >>> */ >>> static void __init reserve_crashkernel(void) >>> { >>> unsigned long long crash_base, crash_size; >>> unsigned long long crash_low_size; >>> unsigned long long crash_max = CRASH_ADDR_LOW_MAX; >>> - int ret; >>> bool fixed_base, high = false; >>> char *cmdline = boot_command_line; >>> + int ret; >>> >>> /* crashkernel=X[@offset] */ >>> ret = parse_crashkernel(cmdline, memblock_phys_mem_size(), >>> &crash_size, &crash_base); >>> if (ret || !crash_size) { >>> - /* crashkernel=X,high */ >>> ret = parse_crashkernel_high(cmdline, 0, &crash_size, &crash_base); >>> if (ret || !crash_size) >>> return; >>> >>> - /* crashkernel=Y,low */ >>> ret = parse_crashkernel_low(cmdline, 0, &crash_low_size, &crash_base); >>> if (ret == -ENOENT) >>> - /* >>> - * crashkernel=Y,low is not specified explicitly, use >>> - * default size automatically. >>> - */ >>> + /* case #2 of crashkernel,low reservation */ >>> crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE; >>> else if (ret) >>> - /* crashkernel=Y,low is specified but Y is invalid */ >>> + /* case #4 of crashkernel,low reservation */ >>> return; >>> >>> - /* Mark crashkernel=X,high is specified */ >>> high = true; >>> crash_max = CRASH_ADDR_HIGH_MAX; >>> } >>> @@ -148,7 +153,6 @@ static void __init reserve_crashkernel(void) >>> fixed_base = !!crash_base; >>> crash_size = PAGE_ALIGN(crash_size); >>> >>> - /* User specifies base address explicitly. */ >>> if (fixed_base) >>> crash_max = crash_base + crash_size; >>> >>> @@ -172,11 +176,7 @@ static void __init reserve_crashkernel(void) >>> } >>> >>> if (crash_base >= SZ_4G) { >>> - /* >>> - * For case crashkernel=X, low memory is not enough and fall >>> - * back to reserve specified size of memory above 4G, try to >>> - * allocate minimum required memory below 4G again. >>> - */ >>> + /* case #3 of crashkernel,low reservation */ >>> if (!high) >>> crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE; >>> >>> >>>> >>>> - /* Current arm64 boot protocol requires 2MB alignment */ >>>> - crash_base = memblock_phys_alloc_range(crash_size, SZ_2M, >>>> +retry: >>>> + crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN, >>>> crash_base, crash_max); >>>> if (!crash_base) { >>>> + /* >>>> + * Attempt to fully allocate low memory failed, fall back >>>> + * to high memory, the minimum required low memory will be >>>> + * reserved later. >>>> + */ >>>> + if (!fixed_base && (crash_max == CRASH_ADDR_LOW_MAX)) { >>>> + crash_max = CRASH_ADDR_HIGH_MAX; >>>> + goto retry; >>>> + } >>>> + >>>> pr_warn("cannot allocate crashkernel (size:0x%llx)\n", >>>> crash_size); >>>> return; >>>> } >>>> >>>> + if (crash_base >= SZ_4G) { >>>> + /* >>>> + * For case crashkernel=X, low memory is not enough and fall >>>> + * back to reserve specified size of memory above 4G, try to >>>> + * allocate minimum required memory below 4G again. >>>> + */ >>>> + if (!high) >>>> + crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE; >>>> + >>>> + if (reserve_crashkernel_low(crash_low_size)) { >>>> + memblock_phys_free(crash_base, crash_size); >>>> + return; >>>> + } >>>> + } >>>> + >>>> pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n", >>>> crash_base, crash_base + crash_size, crash_size >> 20); >>>> >>>> @@ -107,6 +194,9 @@ static void __init reserve_crashkernel(void) >>>> * map. Inform kmemleak so that it won't try to access it. >>>> */ >>>> kmemleak_ignore_phys(crash_base); >>>> + if (crashk_low_res.end) >>>> + kmemleak_ignore_phys(crashk_low_res.start); >>>> + >>>> crashk_res.start = crash_base; >>>> crashk_res.end = crash_base + crash_size - 1; >>>> insert_resource(&iomem_resource, &crashk_res); >>>> -- >>>> 2.25.1 >>>> >>> >>> . >>> >> >> -- >> Regards, >> Zhen Lei >> > > . >
On 2022/3/17 10:38, Baoquan He wrote: > On 02/27/22 at 11:07am, Zhen Lei wrote: >> From: Chen Zhou <chenzhou10@huawei.com> >> >> There are following issues in arm64 kdump: >> 1. We use crashkernel=X to reserve crashkernel below 4G, which >> will fail when there is no enough low memory. >> 2. If reserving crashkernel above 4G, in this case, crash dump >> kernel will boot failure because there is no low memory available > ~~ change it to "get boot failure" or "fail to boot" OK. I'm going to use "fail to boot". >> for allocation. >> >> To solve these issues, change the behavior of crashkernel=X and >> introduce crashkernel=X,[high,low]. crashkernel=X tries low allocation >> in DMA zone, and fall back to high allocation if it fails. >> We can also use "crashkernel=X,high" to select a region above DMA zone, >> which also tries to allocate at least 256M in DMA zone automatically. >> "crashkernel=Y,low" can be used to allocate specified size low memory. >> >> Signed-off-by: Chen Zhou <chenzhou10@huawei.com> >> Co-developed-by: Zhen Lei <thunder.leizhen@huawei.com> >> Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com> > > . >
On 03/17/22 at 11:19am, Leizhen (ThunderTown) wrote: > > > On 2022/3/17 10:36, Baoquan He wrote: > > On 03/16/22 at 09:11pm, Leizhen (ThunderTown) wrote: > >> > >> > >> On 2022/3/16 20:11, Baoquan He wrote: > >>> On 02/27/22 at 11:07am, Zhen Lei wrote: > > ...... > > > >>> Hi leizhen, > >>> > >>> I made change on reserve_crashkenrel(), inline comment may be slow. > >>> Please check and consider if they can be taken. > >> > >> That's great. Thank you very much. > >> > >>> > >>> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c > >>> index 30ae6638ff54..f96351da1e3e 100644 > >>> --- a/arch/arm64/mm/init.c > >>> +++ b/arch/arm64/mm/init.c > >>> @@ -109,38 +109,43 @@ static int __init reserve_crashkernel_low(unsigned long long low_size) > >>> * This function reserves memory area given in "crashkernel=" kernel command > >>> * line parameter. The memory reserved is used by dump capture kernel when > >>> * primary kernel is crashing. > >>> + * > >>> + * NOTE: Reservation of crashkernel,low is special since its existence > >>> + * is not independent, need rely on the existence of crashkernel,high. > >>> + * Hence there are different cases for crashkernel,low reservation: > > > > Considering to update the 3rd line as below: > > > > * NOTE: Reservation of crashkernel,low is special since its existence > > * is not independent, need rely on the existence of crashkernel,high. > > * Here, four cases of crashkernel,low reservation are summarized: > > OK. How about change "crashkernel,low" to "crashkernel low memory"? > "crashkernel=Y,low", "crashkernel=,low" and "crashkernel,low" are very similar, > may dazzle the reader. Fine by me. 'crashkernel low memory' is formal, just make sentence a little longer. Please take what you think fitter. > > > > >>> + * 1) crashkernel=Y,low is specified explicitly, crashkernel,low takes Y; > >>> + * 2) crashkernel=,low is not given, while crashkernel=,high is specified, > >>> + * take the default crashkernel,low value; > >>> + * 3) crashkernel=X is specified, while fallback to get a memory region > >>> + * in high memory, take the default crashkernel,low value; > >>> + * 4) crashkernel='invalid value',low is specified, failed the whole > >>> + * crashkernel reservation and bail out. > >>> */ > >>> static void __init reserve_crashkernel(void) > >>> { > >>> unsigned long long crash_base, crash_size; > >>> unsigned long long crash_low_size; > >>> unsigned long long crash_max = CRASH_ADDR_LOW_MAX; > >>> - int ret; > >>> bool fixed_base, high = false; > >>> char *cmdline = boot_command_line; > >>> + int ret; > >>> > >>> /* crashkernel=X[@offset] */ > >>> ret = parse_crashkernel(cmdline, memblock_phys_mem_size(), > >>> &crash_size, &crash_base); > >>> if (ret || !crash_size) { > >>> - /* crashkernel=X,high */ > >>> ret = parse_crashkernel_high(cmdline, 0, &crash_size, &crash_base); > >>> if (ret || !crash_size) > >>> return; > >>> > >>> - /* crashkernel=Y,low */ > >>> ret = parse_crashkernel_low(cmdline, 0, &crash_low_size, &crash_base); > >>> if (ret == -ENOENT) > >>> - /* > >>> - * crashkernel=Y,low is not specified explicitly, use > >>> - * default size automatically. > >>> - */ > >>> + /* case #2 of crashkernel,low reservation */ > >>> crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE; > >>> else if (ret) > >>> - /* crashkernel=Y,low is specified but Y is invalid */ > >>> + /* case #4 of crashkernel,low reservation */ > >>> return; > >>> > >>> - /* Mark crashkernel=X,high is specified */ > >>> high = true; > >>> crash_max = CRASH_ADDR_HIGH_MAX; > >>> } > >>> @@ -148,7 +153,6 @@ static void __init reserve_crashkernel(void) > >>> fixed_base = !!crash_base; > >>> crash_size = PAGE_ALIGN(crash_size); > >>> > >>> - /* User specifies base address explicitly. */ > >>> if (fixed_base) > >>> crash_max = crash_base + crash_size; > >>> > >>> @@ -172,11 +176,7 @@ static void __init reserve_crashkernel(void) > >>> } > >>> > >>> if (crash_base >= SZ_4G) { > >>> - /* > >>> - * For case crashkernel=X, low memory is not enough and fall > >>> - * back to reserve specified size of memory above 4G, try to > >>> - * allocate minimum required memory below 4G again. > >>> - */ > >>> + /* case #3 of crashkernel,low reservation */ > >>> if (!high) > >>> crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE; > >>> > >>> > >>>> > >>>> - /* Current arm64 boot protocol requires 2MB alignment */ > >>>> - crash_base = memblock_phys_alloc_range(crash_size, SZ_2M, > >>>> +retry: > >>>> + crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN, > >>>> crash_base, crash_max); > >>>> if (!crash_base) { > >>>> + /* > >>>> + * Attempt to fully allocate low memory failed, fall back > >>>> + * to high memory, the minimum required low memory will be > >>>> + * reserved later. > >>>> + */ > >>>> + if (!fixed_base && (crash_max == CRASH_ADDR_LOW_MAX)) { > >>>> + crash_max = CRASH_ADDR_HIGH_MAX; > >>>> + goto retry; > >>>> + } > >>>> + > >>>> pr_warn("cannot allocate crashkernel (size:0x%llx)\n", > >>>> crash_size); > >>>> return; > >>>> } > >>>> > >>>> + if (crash_base >= SZ_4G) { > >>>> + /* > >>>> + * For case crashkernel=X, low memory is not enough and fall > >>>> + * back to reserve specified size of memory above 4G, try to > >>>> + * allocate minimum required memory below 4G again. > >>>> + */ > >>>> + if (!high) > >>>> + crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE; > >>>> + > >>>> + if (reserve_crashkernel_low(crash_low_size)) { > >>>> + memblock_phys_free(crash_base, crash_size); > >>>> + return; > >>>> + } > >>>> + } > >>>> + > >>>> pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n", > >>>> crash_base, crash_base + crash_size, crash_size >> 20); > >>>> > >>>> @@ -107,6 +194,9 @@ static void __init reserve_crashkernel(void) > >>>> * map. Inform kmemleak so that it won't try to access it. > >>>> */ > >>>> kmemleak_ignore_phys(crash_base); > >>>> + if (crashk_low_res.end) > >>>> + kmemleak_ignore_phys(crashk_low_res.start); > >>>> + > >>>> crashk_res.start = crash_base; > >>>> crashk_res.end = crash_base + crash_size - 1; > >>>> insert_resource(&iomem_resource, &crashk_res); > >>>> -- > >>>> 2.25.1 > >>>> > >>> > >>> . > >>> > >> > >> -- > >> Regards, > >> Zhen Lei > >> > > > > . > > > > -- > Regards, > Zhen Lei >
On 2022/3/17 11:47, Baoquan He wrote: > On 03/17/22 at 11:19am, Leizhen (ThunderTown) wrote: >> >> >> On 2022/3/17 10:36, Baoquan He wrote: >>> On 03/16/22 at 09:11pm, Leizhen (ThunderTown) wrote: >>>> >>>> >>>> On 2022/3/16 20:11, Baoquan He wrote: >>>>> On 02/27/22 at 11:07am, Zhen Lei wrote: >>> ...... >>> >>>>> Hi leizhen, >>>>> >>>>> I made change on reserve_crashkenrel(), inline comment may be slow. >>>>> Please check and consider if they can be taken. >>>> >>>> That's great. Thank you very much. >>>> >>>>> >>>>> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c >>>>> index 30ae6638ff54..f96351da1e3e 100644 >>>>> --- a/arch/arm64/mm/init.c >>>>> +++ b/arch/arm64/mm/init.c >>>>> @@ -109,38 +109,43 @@ static int __init reserve_crashkernel_low(unsigned long long low_size) >>>>> * This function reserves memory area given in "crashkernel=" kernel command >>>>> * line parameter. The memory reserved is used by dump capture kernel when >>>>> * primary kernel is crashing. >>>>> + * >>>>> + * NOTE: Reservation of crashkernel,low is special since its existence >>>>> + * is not independent, need rely on the existence of crashkernel,high. >>>>> + * Hence there are different cases for crashkernel,low reservation: >>> >>> Considering to update the 3rd line as below: >>> >>> * NOTE: Reservation of crashkernel,low is special since its existence >>> * is not independent, need rely on the existence of crashkernel,high. >>> * Here, four cases of crashkernel,low reservation are summarized: >> >> OK. How about change "crashkernel,low" to "crashkernel low memory"? >> "crashkernel=Y,low", "crashkernel=,low" and "crashkernel,low" are very similar, >> may dazzle the reader. > > Fine by me. 'crashkernel low memory' is formal, just make sentence a > little longer. Please take what you think fitter. OK, I will send v22 after v5.18-rc1. > >> >>> >>>>> + * 1) crashkernel=Y,low is specified explicitly, crashkernel,low takes Y; >>>>> + * 2) crashkernel=,low is not given, while crashkernel=,high is specified, >>>>> + * take the default crashkernel,low value; >>>>> + * 3) crashkernel=X is specified, while fallback to get a memory region >>>>> + * in high memory, take the default crashkernel,low value; >>>>> + * 4) crashkernel='invalid value',low is specified, failed the whole >>>>> + * crashkernel reservation and bail out. >>>>> */ >>>>> static void __init reserve_crashkernel(void) >>>>> { >>>>> unsigned long long crash_base, crash_size; >>>>> unsigned long long crash_low_size; >>>>> unsigned long long crash_max = CRASH_ADDR_LOW_MAX; >>>>> - int ret; >>>>> bool fixed_base, high = false; >>>>> char *cmdline = boot_command_line; >>>>> + int ret; >>>>> >>>>> /* crashkernel=X[@offset] */ >>>>> ret = parse_crashkernel(cmdline, memblock_phys_mem_size(), >>>>> &crash_size, &crash_base); >>>>> if (ret || !crash_size) { >>>>> - /* crashkernel=X,high */ >>>>> ret = parse_crashkernel_high(cmdline, 0, &crash_size, &crash_base); >>>>> if (ret || !crash_size) >>>>> return; >>>>> >>>>> - /* crashkernel=Y,low */ >>>>> ret = parse_crashkernel_low(cmdline, 0, &crash_low_size, &crash_base); >>>>> if (ret == -ENOENT) >>>>> - /* >>>>> - * crashkernel=Y,low is not specified explicitly, use >>>>> - * default size automatically. >>>>> - */ >>>>> + /* case #2 of crashkernel,low reservation */ >>>>> crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE; >>>>> else if (ret) >>>>> - /* crashkernel=Y,low is specified but Y is invalid */ >>>>> + /* case #4 of crashkernel,low reservation */ >>>>> return; >>>>> >>>>> - /* Mark crashkernel=X,high is specified */ >>>>> high = true; >>>>> crash_max = CRASH_ADDR_HIGH_MAX; >>>>> } >>>>> @@ -148,7 +153,6 @@ static void __init reserve_crashkernel(void) >>>>> fixed_base = !!crash_base; >>>>> crash_size = PAGE_ALIGN(crash_size); >>>>> >>>>> - /* User specifies base address explicitly. */ >>>>> if (fixed_base) >>>>> crash_max = crash_base + crash_size; >>>>> >>>>> @@ -172,11 +176,7 @@ static void __init reserve_crashkernel(void) >>>>> } >>>>> >>>>> if (crash_base >= SZ_4G) { >>>>> - /* >>>>> - * For case crashkernel=X, low memory is not enough and fall >>>>> - * back to reserve specified size of memory above 4G, try to >>>>> - * allocate minimum required memory below 4G again. >>>>> - */ >>>>> + /* case #3 of crashkernel,low reservation */ >>>>> if (!high) >>>>> crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE; >>>>> >>>>> >>>>>> >>>>>> - /* Current arm64 boot protocol requires 2MB alignment */ >>>>>> - crash_base = memblock_phys_alloc_range(crash_size, SZ_2M, >>>>>> +retry: >>>>>> + crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN, >>>>>> crash_base, crash_max); >>>>>> if (!crash_base) { >>>>>> + /* >>>>>> + * Attempt to fully allocate low memory failed, fall back >>>>>> + * to high memory, the minimum required low memory will be >>>>>> + * reserved later. >>>>>> + */ >>>>>> + if (!fixed_base && (crash_max == CRASH_ADDR_LOW_MAX)) { >>>>>> + crash_max = CRASH_ADDR_HIGH_MAX; >>>>>> + goto retry; >>>>>> + } >>>>>> + >>>>>> pr_warn("cannot allocate crashkernel (size:0x%llx)\n", >>>>>> crash_size); >>>>>> return; >>>>>> } >>>>>> >>>>>> + if (crash_base >= SZ_4G) { >>>>>> + /* >>>>>> + * For case crashkernel=X, low memory is not enough and fall >>>>>> + * back to reserve specified size of memory above 4G, try to >>>>>> + * allocate minimum required memory below 4G again. >>>>>> + */ >>>>>> + if (!high) >>>>>> + crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE; >>>>>> + >>>>>> + if (reserve_crashkernel_low(crash_low_size)) { >>>>>> + memblock_phys_free(crash_base, crash_size); >>>>>> + return; >>>>>> + } >>>>>> + } >>>>>> + >>>>>> pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n", >>>>>> crash_base, crash_base + crash_size, crash_size >> 20); >>>>>> >>>>>> @@ -107,6 +194,9 @@ static void __init reserve_crashkernel(void) >>>>>> * map. Inform kmemleak so that it won't try to access it. >>>>>> */ >>>>>> kmemleak_ignore_phys(crash_base); >>>>>> + if (crashk_low_res.end) >>>>>> + kmemleak_ignore_phys(crashk_low_res.start); >>>>>> + >>>>>> crashk_res.start = crash_base; >>>>>> crashk_res.end = crash_base + crash_size - 1; >>>>>> insert_resource(&iomem_resource, &crashk_res); >>>>>> -- >>>>>> 2.25.1 >>>>>> >>>>> >>>>> . >>>>> >>>> >>>> -- >>>> Regards, >>>> Zhen Lei >>>> >>> >>> . >>> >> >> -- >> Regards, >> Zhen Lei >> > > . >
On 2/26/22 9:07 PM, Zhen Lei wrote: > From: Chen Zhou <chenzhou10@huawei.com> > > There are following issues in arm64 kdump: > 1. We use crashkernel=X to reserve crashkernel below 4G, which > will fail when there is no enough low memory. " Not enough " > 2. If reserving crashkernel above 4G, in this case, crash dump > kernel will boot failure because there is no low memory available > for allocation. We can't have a "boot failure". If the requested reservation can not be met, the kdump configuration is not setup. > > To solve these issues, change the behavior of crashkernel=X and > introduce crashkernel=X,[high,low]. crashkernel=X tries low allocation > in DMA zone, and fall back to high allocation if it fails. > We can also use "crashkernel=X,high" to select a region above DMA zone, > which also tries to allocate at least 256M in DMA zone automatically. > "crashkernel=Y,low" can be used to allocate specified size low memory. Is there going to be documentation on what values certain Arm platforms are going to use this on ? > > Signed-off-by: Chen Zhou <chenzhou10@huawei.com> > Co-developed-by: Zhen Lei <thunder.leizhen@huawei.com> > Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com> > --- > arch/arm64/kernel/machine_kexec.c | 9 ++- > arch/arm64/kernel/machine_kexec_file.c | 12 ++- > arch/arm64/mm/init.c | 106 +++++++++++++++++++++++-- > 3 files changed, 115 insertions(+), 12 deletions(-) > > diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c > index e16b248699d5c3c..19c2d487cb08feb 100644 > --- a/arch/arm64/kernel/machine_kexec.c > +++ b/arch/arm64/kernel/machine_kexec.c > @@ -329,8 +329,13 @@ bool crash_is_nosave(unsigned long pfn) > > /* in reserved memory? */ > addr = __pfn_to_phys(pfn); > - if ((addr < crashk_res.start) || (crashk_res.end < addr)) > - return false; > + if ((addr < crashk_res.start) || (crashk_res.end < addr)) { > + if (!crashk_low_res.end) > + return false; > + > + if ((addr < crashk_low_res.start) || (crashk_low_res.end < addr)) > + return false; > + } > > if (!kexec_crash_image) > return true; > diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c > index 59c648d51848886..889951291cc0f9c 100644 > --- a/arch/arm64/kernel/machine_kexec_file.c > +++ b/arch/arm64/kernel/machine_kexec_file.c > @@ -65,10 +65,18 @@ static int prepare_elf_headers(void **addr, unsigned long *sz) > > /* Exclude crashkernel region */ > ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end); > + if (ret) > + goto out; > + > + if (crashk_low_res.end) { > + ret = crash_exclude_mem_range(cmem, crashk_low_res.start, crashk_low_res.end); > + if (ret) > + goto out; > + } > > - if (!ret) > - ret = crash_prepare_elf64_headers(cmem, true, addr, sz); > + ret = crash_prepare_elf64_headers(cmem, true, addr, sz); > > +out: > kfree(cmem); > return ret; > } > diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c > index 90f276d46b93bc6..30ae6638ff54c47 100644 > --- a/arch/arm64/mm/init.c > +++ b/arch/arm64/mm/init.c > @@ -65,6 +65,44 @@ EXPORT_SYMBOL(memstart_addr); > phys_addr_t arm64_dma_phys_limit __ro_after_init; > > #ifdef CONFIG_KEXEC_CORE > +/* Current arm64 boot protocol requires 2MB alignment */ > +#define CRASH_ALIGN SZ_2M > + > +#define CRASH_ADDR_LOW_MAX arm64_dma_phys_limit > +#define CRASH_ADDR_HIGH_MAX memblock.current_limit > + > +/* > + * This is an empirical value in x86_64 and taken here directly. Please > + * refer to the code comment in reserve_crashkernel_low() of x86_64 for more > + * details. > + */ > +#define DEFAULT_CRASH_KERNEL_LOW_SIZE \ > + max(swiotlb_size_or_default() + (8UL << 20), 256UL << 20) > + > +static int __init reserve_crashkernel_low(unsigned long long low_size) > +{ > + unsigned long long low_base; > + > + /* passed with crashkernel=0,low ? */ > + if (!low_size) > + return 0; > + > + low_base = memblock_phys_alloc_range(low_size, CRASH_ALIGN, 0, CRASH_ADDR_LOW_MAX); > + if (!low_base) { > + pr_err("cannot allocate crashkernel low memory (size:0x%llx).\n", low_size); > + return -ENOMEM; > + } > + > + pr_info("crashkernel low memory reserved: 0x%08llx - 0x%08llx (%lld MB)\n", > + low_base, low_base + low_size, low_size >> 20); > + > + crashk_low_res.start = low_base; > + crashk_low_res.end = low_base + low_size - 1; > + insert_resource(&iomem_resource, &crashk_low_res); > + > + return 0; > +} > + > /* > * reserve_crashkernel() - reserves memory for crash kernel > * > @@ -75,30 +113,79 @@ phys_addr_t arm64_dma_phys_limit __ro_after_init; > static void __init reserve_crashkernel(void) > { > unsigned long long crash_base, crash_size; > - unsigned long long crash_max = arm64_dma_phys_limit; > + unsigned long long crash_low_size; > + unsigned long long crash_max = CRASH_ADDR_LOW_MAX; > int ret; > + bool fixed_base, high = false; > + char *cmdline = boot_command_line; > > - ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), > + /* crashkernel=X[@offset] */ > + ret = parse_crashkernel(cmdline, memblock_phys_mem_size(), > &crash_size, &crash_base); > - /* no crashkernel= or invalid value specified */ > - if (ret || !crash_size) > - return; > + if (ret || !crash_size) { > + /* crashkernel=X,high */ > + ret = parse_crashkernel_high(cmdline, 0, &crash_size, &crash_base); > + if (ret || !crash_size) > + return; > + > + /* crashkernel=Y,low */ > + ret = parse_crashkernel_low(cmdline, 0, &crash_low_size, &crash_base); > + if (ret == -ENOENT) > + /* > + * crashkernel=Y,low is not specified explicitly, use > + * default size automatically. > + */ > + crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE; > + else if (ret) > + /* crashkernel=Y,low is specified but Y is invalid */ > + return; > + > + /* Mark crashkernel=X,high is specified */ > + high = true; > + crash_max = CRASH_ADDR_HIGH_MAX; > + } > > + fixed_base = !!crash_base; > crash_size = PAGE_ALIGN(crash_size); > > /* User specifies base address explicitly. */ > - if (crash_base) > + if (fixed_base) > crash_max = crash_base + crash_size; > > - /* Current arm64 boot protocol requires 2MB alignment */ > - crash_base = memblock_phys_alloc_range(crash_size, SZ_2M, > +retry: > + crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN, > crash_base, crash_max); > if (!crash_base) { > + /* > + * Attempt to fully allocate low memory failed, fall back > + * to high memory, the minimum required low memory will be > + * reserved later. > + */ > + if (!fixed_base && (crash_max == CRASH_ADDR_LOW_MAX)) { > + crash_max = CRASH_ADDR_HIGH_MAX; > + goto retry; > + } > + > pr_warn("cannot allocate crashkernel (size:0x%llx)\n", > crash_size); > return; > } > > + if (crash_base >= SZ_4G) { > + /* > + * For case crashkernel=X, low memory is not enough and fall > + * back to reserve specified size of memory above 4G, try to > + * allocate minimum required memory below 4G again. > + */ > + if (!high) > + crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE; > + > + if (reserve_crashkernel_low(crash_low_size)) { > + memblock_phys_free(crash_base, crash_size); > + return; > + } > + } > + > pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n", > crash_base, crash_base + crash_size, crash_size >> 20); > > @@ -107,6 +194,9 @@ static void __init reserve_crashkernel(void) > * map. Inform kmemleak so that it won't try to access it. > */ > kmemleak_ignore_phys(crash_base); > + if (crashk_low_res.end) > + kmemleak_ignore_phys(crashk_low_res.start); > + > crashk_res.start = crash_base; > crashk_res.end = crash_base + crash_size - 1; > insert_resource(&iomem_resource, &crashk_res);
On 3/21/22 8:29AM, John Donnelly wrote: > On 2/26/22 9:07 PM, Zhen Lei wrote: >> From: Chen Zhou <chenzhou10@huawei.com> >> >> There are following issues in arm64 kdump: >> 1. We use crashkernel=X to reserve crashkernel below 4G, which >> will fail when there is no enough low memory. > > " Not enough " >> 2. If reserving crashkernel above 4G, in this case, crash dump >> kernel will boot failure because there is no low memory available >> for allocation. > > We can't have a "boot failure". If the requested reservation > can not be met, the kdump configuration is not setup. I think you misread this. Without these patches, if only high memory is reserved for the crash kernel, then the crash kernel will fail to boot. >> >> To solve these issues, change the behavior of crashkernel=X and >> introduce crashkernel=X,[high,low]. crashkernel=X tries low allocation >> in DMA zone, and fall back to high allocation if it fails. >> We can also use "crashkernel=X,high" to select a region above DMA zone, >> which also tries to allocate at least 256M in DMA zone automatically. >> "crashkernel=Y,low" can be used to allocate specified size low memory. > > Is there going to be documentation on what values certain Arm platforms > are going to use this on ? > >> >> Signed-off-by: Chen Zhou <chenzhou10@huawei.com> >> Co-developed-by: Zhen Lei <thunder.leizhen@huawei.com> >> Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
On 2022/3/21 21:29, John Donnelly wrote: > On 2/26/22 9:07 PM, Zhen Lei wrote: >> From: Chen Zhou <chenzhou10@huawei.com> >> >> There are following issues in arm64 kdump: >> 1. We use crashkernel=X to reserve crashkernel below 4G, which >> will fail when there is no enough low memory. > > " Not enough " OK, thanks >> 2. If reserving crashkernel above 4G, in this case, crash dump >> kernel will boot failure because there is no low memory available >> for allocation. > > We can't have a "boot failure". If the requested reservation > can not be met, the kdump configuration is not setup. >> >> To solve these issues, change the behavior of crashkernel=X and >> introduce crashkernel=X,[high,low]. crashkernel=X tries low allocation >> in DMA zone, and fall back to high allocation if it fails. >> We can also use "crashkernel=X,high" to select a region above DMA zone, >> which also tries to allocate at least 256M in DMA zone automatically. >> "crashkernel=Y,low" can be used to allocate specified size low memory. > > Is there going to be documentation on what values certain Arm platforms are going to use this on ? There is no exact formula. > >> >> Signed-off-by: Chen Zhou <chenzhou10@huawei.com> >> Co-developed-by: Zhen Lei <thunder.leizhen@huawei.com> >> Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com> >> --- >> arch/arm64/kernel/machine_kexec.c | 9 ++- >> arch/arm64/kernel/machine_kexec_file.c | 12 ++- >> arch/arm64/mm/init.c | 106 +++++++++++++++++++++++-- >> 3 files changed, 115 insertions(+), 12 deletions(-) >> >> diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c >> index e16b248699d5c3c..19c2d487cb08feb 100644 >> --- a/arch/arm64/kernel/machine_kexec.c >> +++ b/arch/arm64/kernel/machine_kexec.c >> @@ -329,8 +329,13 @@ bool crash_is_nosave(unsigned long pfn) >> /* in reserved memory? */ >> addr = __pfn_to_phys(pfn); >> - if ((addr < crashk_res.start) || (crashk_res.end < addr)) >> - return false; >> + if ((addr < crashk_res.start) || (crashk_res.end < addr)) { >> + if (!crashk_low_res.end) >> + return false; >> + >> + if ((addr < crashk_low_res.start) || (crashk_low_res.end < addr)) >> + return false; >> + } >> if (!kexec_crash_image) >> return true; >> diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c >> index 59c648d51848886..889951291cc0f9c 100644 >> --- a/arch/arm64/kernel/machine_kexec_file.c >> +++ b/arch/arm64/kernel/machine_kexec_file.c >> @@ -65,10 +65,18 @@ static int prepare_elf_headers(void **addr, unsigned long *sz) >> /* Exclude crashkernel region */ >> ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end); >> + if (ret) >> + goto out; >> + >> + if (crashk_low_res.end) { >> + ret = crash_exclude_mem_range(cmem, crashk_low_res.start, crashk_low_res.end); >> + if (ret) >> + goto out; >> + } >> - if (!ret) >> - ret = crash_prepare_elf64_headers(cmem, true, addr, sz); >> + ret = crash_prepare_elf64_headers(cmem, true, addr, sz); >> +out: >> kfree(cmem); >> return ret; >> } >> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c >> index 90f276d46b93bc6..30ae6638ff54c47 100644 >> --- a/arch/arm64/mm/init.c >> +++ b/arch/arm64/mm/init.c >> @@ -65,6 +65,44 @@ EXPORT_SYMBOL(memstart_addr); >> phys_addr_t arm64_dma_phys_limit __ro_after_init; >> #ifdef CONFIG_KEXEC_CORE >> +/* Current arm64 boot protocol requires 2MB alignment */ >> +#define CRASH_ALIGN SZ_2M >> + >> +#define CRASH_ADDR_LOW_MAX arm64_dma_phys_limit >> +#define CRASH_ADDR_HIGH_MAX memblock.current_limit >> + >> +/* >> + * This is an empirical value in x86_64 and taken here directly. Please >> + * refer to the code comment in reserve_crashkernel_low() of x86_64 for more >> + * details. >> + */ >> +#define DEFAULT_CRASH_KERNEL_LOW_SIZE \ >> + max(swiotlb_size_or_default() + (8UL << 20), 256UL << 20) >> + >> +static int __init reserve_crashkernel_low(unsigned long long low_size) >> +{ >> + unsigned long long low_base; >> + >> + /* passed with crashkernel=0,low ? */ >> + if (!low_size) >> + return 0; >> + >> + low_base = memblock_phys_alloc_range(low_size, CRASH_ALIGN, 0, CRASH_ADDR_LOW_MAX); >> + if (!low_base) { >> + pr_err("cannot allocate crashkernel low memory (size:0x%llx).\n", low_size); >> + return -ENOMEM; >> + } >> + >> + pr_info("crashkernel low memory reserved: 0x%08llx - 0x%08llx (%lld MB)\n", >> + low_base, low_base + low_size, low_size >> 20); >> + >> + crashk_low_res.start = low_base; >> + crashk_low_res.end = low_base + low_size - 1; >> + insert_resource(&iomem_resource, &crashk_low_res); >> + >> + return 0; >> +} >> + >> /* >> * reserve_crashkernel() - reserves memory for crash kernel >> * >> @@ -75,30 +113,79 @@ phys_addr_t arm64_dma_phys_limit __ro_after_init; >> static void __init reserve_crashkernel(void) >> { >> unsigned long long crash_base, crash_size; >> - unsigned long long crash_max = arm64_dma_phys_limit; >> + unsigned long long crash_low_size; >> + unsigned long long crash_max = CRASH_ADDR_LOW_MAX; >> int ret; >> + bool fixed_base, high = false; >> + char *cmdline = boot_command_line; >> - ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), >> + /* crashkernel=X[@offset] */ >> + ret = parse_crashkernel(cmdline, memblock_phys_mem_size(), >> &crash_size, &crash_base); >> - /* no crashkernel= or invalid value specified */ >> - if (ret || !crash_size) >> - return; >> + if (ret || !crash_size) { >> + /* crashkernel=X,high */ >> + ret = parse_crashkernel_high(cmdline, 0, &crash_size, &crash_base); >> + if (ret || !crash_size) >> + return; >> + >> + /* crashkernel=Y,low */ >> + ret = parse_crashkernel_low(cmdline, 0, &crash_low_size, &crash_base); >> + if (ret == -ENOENT) >> + /* >> + * crashkernel=Y,low is not specified explicitly, use >> + * default size automatically. >> + */ >> + crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE; >> + else if (ret) >> + /* crashkernel=Y,low is specified but Y is invalid */ >> + return; >> + >> + /* Mark crashkernel=X,high is specified */ >> + high = true; >> + crash_max = CRASH_ADDR_HIGH_MAX; >> + } >> + fixed_base = !!crash_base; >> crash_size = PAGE_ALIGN(crash_size); >> /* User specifies base address explicitly. */ >> - if (crash_base) >> + if (fixed_base) >> crash_max = crash_base + crash_size; >> - /* Current arm64 boot protocol requires 2MB alignment */ >> - crash_base = memblock_phys_alloc_range(crash_size, SZ_2M, >> +retry: >> + crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN, >> crash_base, crash_max); >> if (!crash_base) { >> + /* >> + * Attempt to fully allocate low memory failed, fall back >> + * to high memory, the minimum required low memory will be >> + * reserved later. >> + */ >> + if (!fixed_base && (crash_max == CRASH_ADDR_LOW_MAX)) { >> + crash_max = CRASH_ADDR_HIGH_MAX; >> + goto retry; >> + } >> + >> pr_warn("cannot allocate crashkernel (size:0x%llx)\n", >> crash_size); >> return; >> } >> + if (crash_base >= SZ_4G) { >> + /* >> + * For case crashkernel=X, low memory is not enough and fall >> + * back to reserve specified size of memory above 4G, try to >> + * allocate minimum required memory below 4G again. >> + */ >> + if (!high) >> + crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE; >> + >> + if (reserve_crashkernel_low(crash_low_size)) { >> + memblock_phys_free(crash_base, crash_size); >> + return; >> + } >> + } >> + >> pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n", >> crash_base, crash_base + crash_size, crash_size >> 20); >> @@ -107,6 +194,9 @@ static void __init reserve_crashkernel(void) >> * map. Inform kmemleak so that it won't try to access it. >> */ >> kmemleak_ignore_phys(crash_base); >> + if (crashk_low_res.end) >> + kmemleak_ignore_phys(crashk_low_res.start); >> + >> crashk_res.start = crash_base; >> crashk_res.end = crash_base + crash_size - 1; >> insert_resource(&iomem_resource, &crashk_res); > > . >
diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c index e16b248699d5c3c..19c2d487cb08feb 100644 --- a/arch/arm64/kernel/machine_kexec.c +++ b/arch/arm64/kernel/machine_kexec.c @@ -329,8 +329,13 @@ bool crash_is_nosave(unsigned long pfn) /* in reserved memory? */ addr = __pfn_to_phys(pfn); - if ((addr < crashk_res.start) || (crashk_res.end < addr)) - return false; + if ((addr < crashk_res.start) || (crashk_res.end < addr)) { + if (!crashk_low_res.end) + return false; + + if ((addr < crashk_low_res.start) || (crashk_low_res.end < addr)) + return false; + } if (!kexec_crash_image) return true; diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c index 59c648d51848886..889951291cc0f9c 100644 --- a/arch/arm64/kernel/machine_kexec_file.c +++ b/arch/arm64/kernel/machine_kexec_file.c @@ -65,10 +65,18 @@ static int prepare_elf_headers(void **addr, unsigned long *sz) /* Exclude crashkernel region */ ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end); + if (ret) + goto out; + + if (crashk_low_res.end) { + ret = crash_exclude_mem_range(cmem, crashk_low_res.start, crashk_low_res.end); + if (ret) + goto out; + } - if (!ret) - ret = crash_prepare_elf64_headers(cmem, true, addr, sz); + ret = crash_prepare_elf64_headers(cmem, true, addr, sz); +out: kfree(cmem); return ret; } diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 90f276d46b93bc6..30ae6638ff54c47 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -65,6 +65,44 @@ EXPORT_SYMBOL(memstart_addr); phys_addr_t arm64_dma_phys_limit __ro_after_init; #ifdef CONFIG_KEXEC_CORE +/* Current arm64 boot protocol requires 2MB alignment */ +#define CRASH_ALIGN SZ_2M + +#define CRASH_ADDR_LOW_MAX arm64_dma_phys_limit +#define CRASH_ADDR_HIGH_MAX memblock.current_limit + +/* + * This is an empirical value in x86_64 and taken here directly. Please + * refer to the code comment in reserve_crashkernel_low() of x86_64 for more + * details. + */ +#define DEFAULT_CRASH_KERNEL_LOW_SIZE \ + max(swiotlb_size_or_default() + (8UL << 20), 256UL << 20) + +static int __init reserve_crashkernel_low(unsigned long long low_size) +{ + unsigned long long low_base; + + /* passed with crashkernel=0,low ? */ + if (!low_size) + return 0; + + low_base = memblock_phys_alloc_range(low_size, CRASH_ALIGN, 0, CRASH_ADDR_LOW_MAX); + if (!low_base) { + pr_err("cannot allocate crashkernel low memory (size:0x%llx).\n", low_size); + return -ENOMEM; + } + + pr_info("crashkernel low memory reserved: 0x%08llx - 0x%08llx (%lld MB)\n", + low_base, low_base + low_size, low_size >> 20); + + crashk_low_res.start = low_base; + crashk_low_res.end = low_base + low_size - 1; + insert_resource(&iomem_resource, &crashk_low_res); + + return 0; +} + /* * reserve_crashkernel() - reserves memory for crash kernel * @@ -75,30 +113,79 @@ phys_addr_t arm64_dma_phys_limit __ro_after_init; static void __init reserve_crashkernel(void) { unsigned long long crash_base, crash_size; - unsigned long long crash_max = arm64_dma_phys_limit; + unsigned long long crash_low_size; + unsigned long long crash_max = CRASH_ADDR_LOW_MAX; int ret; + bool fixed_base, high = false; + char *cmdline = boot_command_line; - ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), + /* crashkernel=X[@offset] */ + ret = parse_crashkernel(cmdline, memblock_phys_mem_size(), &crash_size, &crash_base); - /* no crashkernel= or invalid value specified */ - if (ret || !crash_size) - return; + if (ret || !crash_size) { + /* crashkernel=X,high */ + ret = parse_crashkernel_high(cmdline, 0, &crash_size, &crash_base); + if (ret || !crash_size) + return; + + /* crashkernel=Y,low */ + ret = parse_crashkernel_low(cmdline, 0, &crash_low_size, &crash_base); + if (ret == -ENOENT) + /* + * crashkernel=Y,low is not specified explicitly, use + * default size automatically. + */ + crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE; + else if (ret) + /* crashkernel=Y,low is specified but Y is invalid */ + return; + + /* Mark crashkernel=X,high is specified */ + high = true; + crash_max = CRASH_ADDR_HIGH_MAX; + } + fixed_base = !!crash_base; crash_size = PAGE_ALIGN(crash_size); /* User specifies base address explicitly. */ - if (crash_base) + if (fixed_base) crash_max = crash_base + crash_size; - /* Current arm64 boot protocol requires 2MB alignment */ - crash_base = memblock_phys_alloc_range(crash_size, SZ_2M, +retry: + crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN, crash_base, crash_max); if (!crash_base) { + /* + * Attempt to fully allocate low memory failed, fall back + * to high memory, the minimum required low memory will be + * reserved later. + */ + if (!fixed_base && (crash_max == CRASH_ADDR_LOW_MAX)) { + crash_max = CRASH_ADDR_HIGH_MAX; + goto retry; + } + pr_warn("cannot allocate crashkernel (size:0x%llx)\n", crash_size); return; } + if (crash_base >= SZ_4G) { + /* + * For case crashkernel=X, low memory is not enough and fall + * back to reserve specified size of memory above 4G, try to + * allocate minimum required memory below 4G again. + */ + if (!high) + crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE; + + if (reserve_crashkernel_low(crash_low_size)) { + memblock_phys_free(crash_base, crash_size); + return; + } + } + pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n", crash_base, crash_base + crash_size, crash_size >> 20); @@ -107,6 +194,9 @@ static void __init reserve_crashkernel(void) * map. Inform kmemleak so that it won't try to access it. */ kmemleak_ignore_phys(crash_base); + if (crashk_low_res.end) + kmemleak_ignore_phys(crashk_low_res.start); + crashk_res.start = crash_base; crashk_res.end = crash_base + crash_size - 1; insert_resource(&iomem_resource, &crashk_res);