diff mbox series

[v13,6/8] arm64: kdump: reimplement crashkernel=X

Message ID 20201031074437.168008-7-chenzhou10@huawei.com (mailing list archive)
State New, archived
Headers show
Series support reserving crashkernel above 4G on arm64 kdump | expand

Commit Message

chenzhou Oct. 31, 2020, 7:44 a.m. UTC
There are following issues in arm64 kdump:
1. We use crashkernel=X to reserve crashkernel below 4G, which
will fail when there is no enough low memory.
2. If reserving crashkernel above 4G, in this case, crash dump
kernel will boot failure because there is no low memory available
for allocation.
3. Since commit 1a8e1cef7603 ("arm64: use both ZONE_DMA and ZONE_DMA32"),
if the memory reserved for crash dump kernel falled in ZONE_DMA32,
the devices in crash dump kernel need to use ZONE_DMA will alloc
fail.

To solve these issues, change the behavior of crashkernel=X and
introduce crashkernel=X,[high,low]. crashkernel=X tries low allocation
in DMA zone or DMA32 zone if CONFIG_ZONE_DMA is disabled, and fall back
to high allocation if it fails.
We can also use "crashkernel=X,high" to select a region above DMA zone,
which also tries to allocate at least 256M in DMA zone automatically
(or the DMA32 zone if CONFIG_ZONE_DMA is disabled).
"crashkernel=Y,low" can be used to allocate specified size low memory.

Another minor change, there may be two regions reserved for crash
dump kernel, in order to distinct from the high region and make no
effect to the use of existing kexec-tools, rename the low region as
"Crash kernel (low)".

Signed-off-by: Chen Zhou <chenzhou10@huawei.com>
Tested-by: John Donnelly <John.p.donnelly@oracle.com>
---
 arch/arm64/include/asm/kexec.h |  9 +++++
 arch/arm64/kernel/setup.c      | 13 +++++++-
 arch/arm64/mm/init.c           | 60 ++--------------------------------
 arch/arm64/mm/mmu.c            |  4 +++
 kernel/crash_core.c            |  8 +++--
 5 files changed, 34 insertions(+), 60 deletions(-)

Comments

Baoquan He Nov. 11, 2020, 1:59 a.m. UTC | #1
On 10/31/20 at 03:44pm, Chen Zhou wrote:
> There are following issues in arm64 kdump:
> 1. We use crashkernel=X to reserve crashkernel below 4G, which
> will fail when there is no enough low memory.
> 2. If reserving crashkernel above 4G, in this case, crash dump
> kernel will boot failure because there is no low memory available
> for allocation.
> 3. Since commit 1a8e1cef7603 ("arm64: use both ZONE_DMA and ZONE_DMA32"),
> if the memory reserved for crash dump kernel falled in ZONE_DMA32,
> the devices in crash dump kernel need to use ZONE_DMA will alloc
> fail.
> 
> To solve these issues, change the behavior of crashkernel=X and
> introduce crashkernel=X,[high,low]. crashkernel=X tries low allocation
> in DMA zone or DMA32 zone if CONFIG_ZONE_DMA is disabled, and fall back
> to high allocation if it fails.
> We can also use "crashkernel=X,high" to select a region above DMA zone,
> which also tries to allocate at least 256M in DMA zone automatically
> (or the DMA32 zone if CONFIG_ZONE_DMA is disabled).
> "crashkernel=Y,low" can be used to allocate specified size low memory.
> 
> Another minor change, there may be two regions reserved for crash
> dump kernel, in order to distinct from the high region and make no
> effect to the use of existing kexec-tools, rename the low region as
> "Crash kernel (low)".
> 
> Signed-off-by: Chen Zhou <chenzhou10@huawei.com>
> Tested-by: John Donnelly <John.p.donnelly@oracle.com>
> ---
>  arch/arm64/include/asm/kexec.h |  9 +++++
>  arch/arm64/kernel/setup.c      | 13 +++++++-
>  arch/arm64/mm/init.c           | 60 ++--------------------------------
>  arch/arm64/mm/mmu.c            |  4 +++
>  kernel/crash_core.c            |  8 +++--
>  5 files changed, 34 insertions(+), 60 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h
> index 402d208265a3..79909ae5e22e 100644
> --- a/arch/arm64/include/asm/kexec.h
> +++ b/arch/arm64/include/asm/kexec.h
> @@ -28,7 +28,12 @@
>  /* 2M alignment for crash kernel regions */
>  #define CRASH_ALIGN	SZ_2M
>  
> +#ifdef CONFIG_ZONE_DMA
> +#define CRASH_ADDR_LOW_MAX	arm64_dma_phys_limit
> +#else
>  #define CRASH_ADDR_LOW_MAX	arm64_dma32_phys_limit
> +#endif
> +
>  #define CRASH_ADDR_HIGH_MAX	MEMBLOCK_ALLOC_ACCESSIBLE
>  
>  #ifndef __ASSEMBLY__
> @@ -96,6 +101,10 @@ static inline void crash_prepare_suspend(void) {}
>  static inline void crash_post_resume(void) {}
>  #endif
>  
> +#ifdef CONFIG_KEXEC_CORE
> +extern void __init reserve_crashkernel(void);
> +#endif
> +
>  #ifdef CONFIG_KEXEC_FILE
>  #define ARCH_HAS_KIMAGE_ARCH
>  
> diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
> index 133257ffd859..6aff30de8f47 100644
> --- a/arch/arm64/kernel/setup.c
> +++ b/arch/arm64/kernel/setup.c
> @@ -238,7 +238,18 @@ static void __init request_standard_resources(void)
>  		    kernel_data.end <= res->end)
>  			request_resource(res, &kernel_data);
>  #ifdef CONFIG_KEXEC_CORE
> -		/* Userspace will find "Crash kernel" region in /proc/iomem. */
> +		/*
> +		 * Userspace will find "Crash kernel" or "Crash kernel (low)"
> +		 * region in /proc/iomem.
> +		 * In order to distinct from the high region and make no effect
> +		 * to the use of existing kexec-tools, rename the low region as
> +		 * "Crash kernel (low)".
> +		 */
> +		if (crashk_low_res.end && crashk_low_res.start >= res->start &&
> +				crashk_low_res.end <= res->end) {
> +			crashk_low_res.name = "Crash kernel (low)";
> +			request_resource(res, &crashk_low_res);
> +		}
>  		if (crashk_res.end && crashk_res.start >= res->start &&
>  		    crashk_res.end <= res->end)
>  			request_resource(res, &crashk_res);
> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
> index a07fd8e1f926..888c4f7eadc3 100644
> --- a/arch/arm64/mm/init.c
> +++ b/arch/arm64/mm/init.c
> @@ -34,6 +34,7 @@
>  #include <asm/fixmap.h>
>  #include <asm/kasan.h>
>  #include <asm/kernel-pgtable.h>
> +#include <asm/kexec.h>
>  #include <asm/memory.h>
>  #include <asm/numa.h>
>  #include <asm/sections.h>
> @@ -62,66 +63,11 @@ EXPORT_SYMBOL(memstart_addr);
>  phys_addr_t arm64_dma_phys_limit __ro_after_init;
>  phys_addr_t arm64_dma32_phys_limit __ro_after_init;
>  
> -#ifdef CONFIG_KEXEC_CORE
> -/*
> - * reserve_crashkernel() - reserves memory for crash kernel
> - *
> - * This function reserves memory area given in "crashkernel=" kernel command
> - * line parameter. The memory reserved is used by dump capture kernel when
> - * primary kernel is crashing.
> - */
> -static void __init reserve_crashkernel(void)
> -{
> -	unsigned long long crash_base, crash_size;
> -	int ret;
> -
> -	ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
> -				&crash_size, &crash_base);
> -	/* no crashkernel= or invalid value specified */
> -	if (ret || !crash_size)
> -		return;
> -
> -	crash_size = PAGE_ALIGN(crash_size);
> -
> -	if (crash_base == 0) {
> -		/* Current arm64 boot protocol requires 2MB alignment */
> -		crash_base = memblock_find_in_range(CRASH_ALIGN, CRASH_ADDR_LOW_MAX,
> -				crash_size, CRASH_ALIGN);
> -		if (crash_base == 0) {
> -			pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
> -				crash_size);
> -			return;
> -		}
> -	} else {
> -		/* User specifies base address explicitly. */
> -		if (!memblock_is_region_memory(crash_base, crash_size)) {
> -			pr_warn("cannot reserve crashkernel: region is not memory\n");
> -			return;
> -		}
> -
> -		if (memblock_is_region_reserved(crash_base, crash_size)) {
> -			pr_warn("cannot reserve crashkernel: region overlaps reserved memory\n");
> -			return;
> -		}
> -
> -		if (!IS_ALIGNED(crash_base, CRASH_ALIGN)) {
> -			pr_warn("cannot reserve crashkernel: base address is not 2MB aligned\n");
> -			return;
> -		}
> -	}
> -	memblock_reserve(crash_base, crash_size);
> -
> -	pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n",
> -		crash_base, crash_base + crash_size, crash_size >> 20);
> -
> -	crashk_res.start = crash_base;
> -	crashk_res.end = crash_base + crash_size - 1;
> -}
> -#else
> +#ifndef CONFIG_KEXEC_CORE
>  static void __init reserve_crashkernel(void)
>  {
>  }
> -#endif /* CONFIG_KEXEC_CORE */
> +#endif
>  
>  #ifdef CONFIG_CRASH_DUMP
>  static int __init early_init_dt_scan_elfcorehdr(unsigned long node,
> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
> index 1c0f3e02f731..c55cee290bbb 100644
> --- a/arch/arm64/mm/mmu.c
> +++ b/arch/arm64/mm/mmu.c
> @@ -488,6 +488,10 @@ static void __init map_mem(pgd_t *pgdp)
>  	 */
>  	memblock_mark_nomap(kernel_start, kernel_end - kernel_start);
>  #ifdef CONFIG_KEXEC_CORE
> +	if (crashk_low_res.end)
> +		memblock_mark_nomap(crashk_low_res.start,
> +				    resource_size(&crashk_low_res));
> +
>  	if (crashk_res.end)
>  		memblock_mark_nomap(crashk_res.start,
>  				    resource_size(&crashk_res));
> diff --git a/kernel/crash_core.c b/kernel/crash_core.c
> index d39892bdb9ae..cdef7d8c91a6 100644
> --- a/kernel/crash_core.c
> +++ b/kernel/crash_core.c
> @@ -321,7 +321,7 @@ int __init parse_crashkernel_low(char *cmdline,
>  
>  int __init reserve_crashkernel_low(void)
>  {
> -#ifdef CONFIG_X86_64
> +#if defined(CONFIG_X86_64) || defined(CONFIG_ARM64)

Not very sure if a CONFIG_64BIT checking is better.

>  	unsigned long long base, low_base = 0, low_size = 0;
>  	unsigned long low_mem_limit;
>  	int ret;
> @@ -362,12 +362,14 @@ int __init reserve_crashkernel_low(void)
>  
>  	crashk_low_res.start = low_base;
>  	crashk_low_res.end   = low_base + low_size - 1;
> +#ifdef CONFIG_X86_64
>  	insert_resource(&iomem_resource, &crashk_low_res);
> +#endif
>  #endif
>  	return 0;
>  }
>  
> -#ifdef CONFIG_X86
> +#if defined(CONFIG_X86) || defined(CONFIG_ARM64)

Should we make this weak default so that we can remove the ARCH config?

>  #ifdef CONFIG_KEXEC_CORE
>  /*
>   * reserve_crashkernel() - reserves memory for crash kernel
> @@ -453,7 +455,9 @@ void __init reserve_crashkernel(void)
>  
>  	crashk_res.start = crash_base;
>  	crashk_res.end   = crash_base + crash_size - 1;
> +#ifdef CONFIG_X86
>  	insert_resource(&iomem_resource, &crashk_res);
> +#endif
>  }
>  #endif /* CONFIG_KEXEC_CORE */
>  #endif
> -- 
> 2.20.1
> 
> 
> _______________________________________________
> kexec mailing list
> kexec@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/kexec
>
chenzhou Nov. 11, 2020, 1:27 p.m. UTC | #2
Hi Baoquan,


On 2020/11/11 9:59, Baoquan He wrote:
> On 10/31/20 at 03:44pm, Chen Zhou wrote:
>> There are following issues in arm64 kdump:
>> 1. We use crashkernel=X to reserve crashkernel below 4G, which
>> will fail when there is no enough low memory.
>> 2. If reserving crashkernel above 4G, in this case, crash dump
>> kernel will boot failure because there is no low memory available
>> for allocation.
>> 3. Since commit 1a8e1cef7603 ("arm64: use both ZONE_DMA and ZONE_DMA32"),
>> if the memory reserved for crash dump kernel falled in ZONE_DMA32,
>> the devices in crash dump kernel need to use ZONE_DMA will alloc
>> fail.
>>
>> To solve these issues, change the behavior of crashkernel=X and
>> introduce crashkernel=X,[high,low]. crashkernel=X tries low allocation
>> in DMA zone or DMA32 zone if CONFIG_ZONE_DMA is disabled, and fall back
>> to high allocation if it fails.
>> We can also use "crashkernel=X,high" to select a region above DMA zone,
>> which also tries to allocate at least 256M in DMA zone automatically
>> (or the DMA32 zone if CONFIG_ZONE_DMA is disabled).
>> "crashkernel=Y,low" can be used to allocate specified size low memory.
>>
>> Another minor change, there may be two regions reserved for crash
>> dump kernel, in order to distinct from the high region and make no
>> effect to the use of existing kexec-tools, rename the low region as
>> "Crash kernel (low)".
>>
>> Signed-off-by: Chen Zhou <chenzhou10@huawei.com>
>> Tested-by: John Donnelly <John.p.donnelly@oracle.com>
>> ---
>>  arch/arm64/include/asm/kexec.h |  9 +++++
>>  arch/arm64/kernel/setup.c      | 13 +++++++-
>>  arch/arm64/mm/init.c           | 60 ++--------------------------------
>>  arch/arm64/mm/mmu.c            |  4 +++
>>  kernel/crash_core.c            |  8 +++--
>>  5 files changed, 34 insertions(+), 60 deletions(-)
>>
>> diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h
>> index 402d208265a3..79909ae5e22e 100644
>> --- a/arch/arm64/include/asm/kexec.h
>> +++ b/arch/arm64/include/asm/kexec.h
>> @@ -28,7 +28,12 @@
>>  /* 2M alignment for crash kernel regions */
>>  #define CRASH_ALIGN	SZ_2M
>>  
>> +#ifdef CONFIG_ZONE_DMA
>> +#define CRASH_ADDR_LOW_MAX	arm64_dma_phys_limit
>> +#else
>>  #define CRASH_ADDR_LOW_MAX	arm64_dma32_phys_limit
>> +#endif
>> +
>>  #define CRASH_ADDR_HIGH_MAX	MEMBLOCK_ALLOC_ACCESSIBLE
>>  
>>  #ifndef __ASSEMBLY__
>> @@ -96,6 +101,10 @@ static inline void crash_prepare_suspend(void) {}
>>  static inline void crash_post_resume(void) {}
>>  #endif
>>  
>> +#ifdef CONFIG_KEXEC_CORE
>> +extern void __init reserve_crashkernel(void);
>> +#endif
>> +
>>  #ifdef CONFIG_KEXEC_FILE
>>  #define ARCH_HAS_KIMAGE_ARCH
>>  
>> diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
>> index 133257ffd859..6aff30de8f47 100644
>> --- a/arch/arm64/kernel/setup.c
>> +++ b/arch/arm64/kernel/setup.c
>> @@ -238,7 +238,18 @@ static void __init request_standard_resources(void)
>>  		    kernel_data.end <= res->end)
>>  			request_resource(res, &kernel_data);
>>  #ifdef CONFIG_KEXEC_CORE
>> -		/* Userspace will find "Crash kernel" region in /proc/iomem. */
>> +		/*
>> +		 * Userspace will find "Crash kernel" or "Crash kernel (low)"
>> +		 * region in /proc/iomem.
>> +		 * In order to distinct from the high region and make no effect
>> +		 * to the use of existing kexec-tools, rename the low region as
>> +		 * "Crash kernel (low)".
>> +		 */
>> +		if (crashk_low_res.end && crashk_low_res.start >= res->start &&
>> +				crashk_low_res.end <= res->end) {
>> +			crashk_low_res.name = "Crash kernel (low)";
>> +			request_resource(res, &crashk_low_res);
>> +		}
>>  		if (crashk_res.end && crashk_res.start >= res->start &&
>>  		    crashk_res.end <= res->end)
>>  			request_resource(res, &crashk_res);
>> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
>> index a07fd8e1f926..888c4f7eadc3 100644
>> --- a/arch/arm64/mm/init.c
>> +++ b/arch/arm64/mm/init.c
>> @@ -34,6 +34,7 @@
>>  #include <asm/fixmap.h>
>>  #include <asm/kasan.h>
>>  #include <asm/kernel-pgtable.h>
>> +#include <asm/kexec.h>
>>  #include <asm/memory.h>
>>  #include <asm/numa.h>
>>  #include <asm/sections.h>
>> @@ -62,66 +63,11 @@ EXPORT_SYMBOL(memstart_addr);
>>  phys_addr_t arm64_dma_phys_limit __ro_after_init;
>>  phys_addr_t arm64_dma32_phys_limit __ro_after_init;
>>  
>> -#ifdef CONFIG_KEXEC_CORE
>> -/*
>> - * reserve_crashkernel() - reserves memory for crash kernel
>> - *
>> - * This function reserves memory area given in "crashkernel=" kernel command
>> - * line parameter. The memory reserved is used by dump capture kernel when
>> - * primary kernel is crashing.
>> - */
>> -static void __init reserve_crashkernel(void)
>> -{
>> -	unsigned long long crash_base, crash_size;
>> -	int ret;
>> -
>> -	ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
>> -				&crash_size, &crash_base);
>> -	/* no crashkernel= or invalid value specified */
>> -	if (ret || !crash_size)
>> -		return;
>> -
>> -	crash_size = PAGE_ALIGN(crash_size);
>> -
>> -	if (crash_base == 0) {
>> -		/* Current arm64 boot protocol requires 2MB alignment */
>> -		crash_base = memblock_find_in_range(CRASH_ALIGN, CRASH_ADDR_LOW_MAX,
>> -				crash_size, CRASH_ALIGN);
>> -		if (crash_base == 0) {
>> -			pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
>> -				crash_size);
>> -			return;
>> -		}
>> -	} else {
>> -		/* User specifies base address explicitly. */
>> -		if (!memblock_is_region_memory(crash_base, crash_size)) {
>> -			pr_warn("cannot reserve crashkernel: region is not memory\n");
>> -			return;
>> -		}
>> -
>> -		if (memblock_is_region_reserved(crash_base, crash_size)) {
>> -			pr_warn("cannot reserve crashkernel: region overlaps reserved memory\n");
>> -			return;
>> -		}
>> -
>> -		if (!IS_ALIGNED(crash_base, CRASH_ALIGN)) {
>> -			pr_warn("cannot reserve crashkernel: base address is not 2MB aligned\n");
>> -			return;
>> -		}
>> -	}
>> -	memblock_reserve(crash_base, crash_size);
>> -
>> -	pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n",
>> -		crash_base, crash_base + crash_size, crash_size >> 20);
>> -
>> -	crashk_res.start = crash_base;
>> -	crashk_res.end = crash_base + crash_size - 1;
>> -}
>> -#else
>> +#ifndef CONFIG_KEXEC_CORE
>>  static void __init reserve_crashkernel(void)
>>  {
>>  }
>> -#endif /* CONFIG_KEXEC_CORE */
>> +#endif
>>  
>>  #ifdef CONFIG_CRASH_DUMP
>>  static int __init early_init_dt_scan_elfcorehdr(unsigned long node,
>> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
>> index 1c0f3e02f731..c55cee290bbb 100644
>> --- a/arch/arm64/mm/mmu.c
>> +++ b/arch/arm64/mm/mmu.c
>> @@ -488,6 +488,10 @@ static void __init map_mem(pgd_t *pgdp)
>>  	 */
>>  	memblock_mark_nomap(kernel_start, kernel_end - kernel_start);
>>  #ifdef CONFIG_KEXEC_CORE
>> +	if (crashk_low_res.end)
>> +		memblock_mark_nomap(crashk_low_res.start,
>> +				    resource_size(&crashk_low_res));
>> +
>>  	if (crashk_res.end)
>>  		memblock_mark_nomap(crashk_res.start,
>>  				    resource_size(&crashk_res));
>> diff --git a/kernel/crash_core.c b/kernel/crash_core.c
>> index d39892bdb9ae..cdef7d8c91a6 100644
>> --- a/kernel/crash_core.c
>> +++ b/kernel/crash_core.c
>> @@ -321,7 +321,7 @@ int __init parse_crashkernel_low(char *cmdline,
>>  
>>  int __init reserve_crashkernel_low(void)
>>  {
>> -#ifdef CONFIG_X86_64
>> +#if defined(CONFIG_X86_64) || defined(CONFIG_ARM64)
> Not very sure if a CONFIG_64BIT checking is better.
If doing like this, there may be some compiling errors for other 64-bit kernel, such as mips.
>
>>  	unsigned long long base, low_base = 0, low_size = 0;
>>  	unsigned long low_mem_limit;
>>  	int ret;
>> @@ -362,12 +362,14 @@ int __init reserve_crashkernel_low(void)
>>  
>>  	crashk_low_res.start = low_base;
>>  	crashk_low_res.end   = low_base + low_size - 1;
>> +#ifdef CONFIG_X86_64
>>  	insert_resource(&iomem_resource, &crashk_low_res);
>> +#endif
>>  #endif
>>  	return 0;
>>  }
>>  
>> -#ifdef CONFIG_X86
>> +#if defined(CONFIG_X86) || defined(CONFIG_ARM64)
> Should we make this weak default so that we can remove the ARCH config?
The same as above, some arch may not support kdump, in that case,  compiling errors occur.

Thanks,
Chen Zhou
>
>>  #ifdef CONFIG_KEXEC_CORE
>>  /*
>>   * reserve_crashkernel() - reserves memory for crash kernel
>> @@ -453,7 +455,9 @@ void __init reserve_crashkernel(void)
>>  
>>  	crashk_res.start = crash_base;
>>  	crashk_res.end   = crash_base + crash_size - 1;
>> +#ifdef CONFIG_X86
>>  	insert_resource(&iomem_resource, &crashk_res);
>> +#endif
>>  }
>>  #endif /* CONFIG_KEXEC_CORE */
>>  #endif
>> -- 
>> 2.20.1
>>
>>
>> _______________________________________________
>> kexec mailing list
>> kexec@lists.infradead.org
>> http://lists.infradead.org/mailman/listinfo/kexec
>>
> .
>
Baoquan He Nov. 11, 2020, 1:54 p.m. UTC | #3
On 11/11/20 at 09:27pm, chenzhou wrote:
> Hi Baoquan,
...
> >>  #ifdef CONFIG_CRASH_DUMP
> >>  static int __init early_init_dt_scan_elfcorehdr(unsigned long node,
> >> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
> >> index 1c0f3e02f731..c55cee290bbb 100644
> >> --- a/arch/arm64/mm/mmu.c
> >> +++ b/arch/arm64/mm/mmu.c
> >> @@ -488,6 +488,10 @@ static void __init map_mem(pgd_t *pgdp)
> >>  	 */
> >>  	memblock_mark_nomap(kernel_start, kernel_end - kernel_start);
> >>  #ifdef CONFIG_KEXEC_CORE
> >> +	if (crashk_low_res.end)
> >> +		memblock_mark_nomap(crashk_low_res.start,
> >> +				    resource_size(&crashk_low_res));
> >> +
> >>  	if (crashk_res.end)
> >>  		memblock_mark_nomap(crashk_res.start,
> >>  				    resource_size(&crashk_res));
> >> diff --git a/kernel/crash_core.c b/kernel/crash_core.c
> >> index d39892bdb9ae..cdef7d8c91a6 100644
> >> --- a/kernel/crash_core.c
> >> +++ b/kernel/crash_core.c
> >> @@ -321,7 +321,7 @@ int __init parse_crashkernel_low(char *cmdline,
> >>  
> >>  int __init reserve_crashkernel_low(void)
> >>  {
> >> -#ifdef CONFIG_X86_64
> >> +#if defined(CONFIG_X86_64) || defined(CONFIG_ARM64)
> > Not very sure if a CONFIG_64BIT checking is better.
> If doing like this, there may be some compiling errors for other 64-bit kernel, such as mips.
> >
> >>  	unsigned long long base, low_base = 0, low_size = 0;
> >>  	unsigned long low_mem_limit;
> >>  	int ret;
> >> @@ -362,12 +362,14 @@ int __init reserve_crashkernel_low(void)
> >>  
> >>  	crashk_low_res.start = low_base;
> >>  	crashk_low_res.end   = low_base + low_size - 1;
> >> +#ifdef CONFIG_X86_64
> >>  	insert_resource(&iomem_resource, &crashk_low_res);
> >> +#endif
> >>  #endif
> >>  	return 0;
> >>  }
> >>  
> >> -#ifdef CONFIG_X86
> >> +#if defined(CONFIG_X86) || defined(CONFIG_ARM64)
> > Should we make this weak default so that we can remove the ARCH config?
> The same as above, some arch may not support kdump, in that case,  compiling errors occur.

OK, not sure if other people have better idea, oterwise, we can leave with it. 
Thanks for telling.
Mike Rapoport Nov. 12, 2020, 8:25 a.m. UTC | #4
On Wed, Nov 11, 2020 at 09:54:48PM +0800, Baoquan He wrote:
> On 11/11/20 at 09:27pm, chenzhou wrote:
> > Hi Baoquan,
> ...
> > >>  #ifdef CONFIG_CRASH_DUMP
> > >>  static int __init early_init_dt_scan_elfcorehdr(unsigned long node,
> > >> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
> > >> index 1c0f3e02f731..c55cee290bbb 100644
> > >> --- a/arch/arm64/mm/mmu.c
> > >> +++ b/arch/arm64/mm/mmu.c
> > >> @@ -488,6 +488,10 @@ static void __init map_mem(pgd_t *pgdp)
> > >>  	 */
> > >>  	memblock_mark_nomap(kernel_start, kernel_end - kernel_start);
> > >>  #ifdef CONFIG_KEXEC_CORE
> > >> +	if (crashk_low_res.end)
> > >> +		memblock_mark_nomap(crashk_low_res.start,
> > >> +				    resource_size(&crashk_low_res));
> > >> +
> > >>  	if (crashk_res.end)
> > >>  		memblock_mark_nomap(crashk_res.start,
> > >>  				    resource_size(&crashk_res));
> > >> diff --git a/kernel/crash_core.c b/kernel/crash_core.c
> > >> index d39892bdb9ae..cdef7d8c91a6 100644
> > >> --- a/kernel/crash_core.c
> > >> +++ b/kernel/crash_core.c
> > >> @@ -321,7 +321,7 @@ int __init parse_crashkernel_low(char *cmdline,
> > >>  
> > >>  int __init reserve_crashkernel_low(void)
> > >>  {
> > >> -#ifdef CONFIG_X86_64
> > >> +#if defined(CONFIG_X86_64) || defined(CONFIG_ARM64)
> > > Not very sure if a CONFIG_64BIT checking is better.
> > If doing like this, there may be some compiling errors for other 64-bit kernel, such as mips.
> > >
> > >>  	unsigned long long base, low_base = 0, low_size = 0;
> > >>  	unsigned long low_mem_limit;
> > >>  	int ret;
> > >> @@ -362,12 +362,14 @@ int __init reserve_crashkernel_low(void)
> > >>  
> > >>  	crashk_low_res.start = low_base;
> > >>  	crashk_low_res.end   = low_base + low_size - 1;
> > >> +#ifdef CONFIG_X86_64
> > >>  	insert_resource(&iomem_resource, &crashk_low_res);
> > >> +#endif
> > >>  #endif
> > >>  	return 0;
> > >>  }
> > >>  
> > >> -#ifdef CONFIG_X86
> > >> +#if defined(CONFIG_X86) || defined(CONFIG_ARM64)
> > > Should we make this weak default so that we can remove the ARCH config?
> > The same as above, some arch may not support kdump, in that case,  compiling errors occur.
> 
> OK, not sure if other people have better idea, oterwise, we can leave with it. 
> Thanks for telling.

I think it would be better to have CONFIG_ARCH_WANT_RESERVE_CRASH_KERNEL
in arch/Kconfig and select this by X86 and ARM64.

Since reserve_crashkernel() implementations are quite similart on other
architectures as well, we can have more users of this later.
Baoquan He Nov. 12, 2020, 8:36 a.m. UTC | #5
On 11/12/20 at 10:25am, Mike Rapoport wrote:
> On Wed, Nov 11, 2020 at 09:54:48PM +0800, Baoquan He wrote:
> > On 11/11/20 at 09:27pm, chenzhou wrote:
> > > Hi Baoquan,
> > ...
> > > >>  #ifdef CONFIG_CRASH_DUMP
> > > >>  static int __init early_init_dt_scan_elfcorehdr(unsigned long node,
> > > >> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
> > > >> index 1c0f3e02f731..c55cee290bbb 100644
> > > >> --- a/arch/arm64/mm/mmu.c
> > > >> +++ b/arch/arm64/mm/mmu.c
> > > >> @@ -488,6 +488,10 @@ static void __init map_mem(pgd_t *pgdp)
> > > >>  	 */
> > > >>  	memblock_mark_nomap(kernel_start, kernel_end - kernel_start);
> > > >>  #ifdef CONFIG_KEXEC_CORE
> > > >> +	if (crashk_low_res.end)
> > > >> +		memblock_mark_nomap(crashk_low_res.start,
> > > >> +				    resource_size(&crashk_low_res));
> > > >> +
> > > >>  	if (crashk_res.end)
> > > >>  		memblock_mark_nomap(crashk_res.start,
> > > >>  				    resource_size(&crashk_res));
> > > >> diff --git a/kernel/crash_core.c b/kernel/crash_core.c
> > > >> index d39892bdb9ae..cdef7d8c91a6 100644
> > > >> --- a/kernel/crash_core.c
> > > >> +++ b/kernel/crash_core.c
> > > >> @@ -321,7 +321,7 @@ int __init parse_crashkernel_low(char *cmdline,
> > > >>  
> > > >>  int __init reserve_crashkernel_low(void)
> > > >>  {
> > > >> -#ifdef CONFIG_X86_64
> > > >> +#if defined(CONFIG_X86_64) || defined(CONFIG_ARM64)
> > > > Not very sure if a CONFIG_64BIT checking is better.
> > > If doing like this, there may be some compiling errors for other 64-bit kernel, such as mips.
> > > >
> > > >>  	unsigned long long base, low_base = 0, low_size = 0;
> > > >>  	unsigned long low_mem_limit;
> > > >>  	int ret;
> > > >> @@ -362,12 +362,14 @@ int __init reserve_crashkernel_low(void)
> > > >>  
> > > >>  	crashk_low_res.start = low_base;
> > > >>  	crashk_low_res.end   = low_base + low_size - 1;
> > > >> +#ifdef CONFIG_X86_64
> > > >>  	insert_resource(&iomem_resource, &crashk_low_res);
> > > >> +#endif
> > > >>  #endif
> > > >>  	return 0;
> > > >>  }
> > > >>  
> > > >> -#ifdef CONFIG_X86
> > > >> +#if defined(CONFIG_X86) || defined(CONFIG_ARM64)
> > > > Should we make this weak default so that we can remove the ARCH config?
> > > The same as above, some arch may not support kdump, in that case,  compiling errors occur.
> > 
> > OK, not sure if other people have better idea, oterwise, we can leave with it. 
> > Thanks for telling.
> 
> I think it would be better to have CONFIG_ARCH_WANT_RESERVE_CRASH_KERNEL
> in arch/Kconfig and select this by X86 and ARM64.
> 
> Since reserve_crashkernel() implementations are quite similart on other
> architectures as well, we can have more users of this later.

Yes, this sounds like a nice way.
chenzhou Nov. 12, 2020, 1:11 p.m. UTC | #6
On 2020/11/12 16:36, Baoquan He wrote:
> On 11/12/20 at 10:25am, Mike Rapoport wrote:
>> On Wed, Nov 11, 2020 at 09:54:48PM +0800, Baoquan He wrote:
>>> On 11/11/20 at 09:27pm, chenzhou wrote:
>>>> Hi Baoquan,
>>> ...
>>>>>>  #ifdef CONFIG_CRASH_DUMP
>>>>>>  static int __init early_init_dt_scan_elfcorehdr(unsigned long node,
>>>>>> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
>>>>>> index 1c0f3e02f731..c55cee290bbb 100644
>>>>>> --- a/arch/arm64/mm/mmu.c
>>>>>> +++ b/arch/arm64/mm/mmu.c
>>>>>> @@ -488,6 +488,10 @@ static void __init map_mem(pgd_t *pgdp)
>>>>>>  	 */
>>>>>>  	memblock_mark_nomap(kernel_start, kernel_end - kernel_start);
>>>>>>  #ifdef CONFIG_KEXEC_CORE
>>>>>> +	if (crashk_low_res.end)
>>>>>> +		memblock_mark_nomap(crashk_low_res.start,
>>>>>> +				    resource_size(&crashk_low_res));
>>>>>> +
>>>>>>  	if (crashk_res.end)
>>>>>>  		memblock_mark_nomap(crashk_res.start,
>>>>>>  				    resource_size(&crashk_res));
>>>>>> diff --git a/kernel/crash_core.c b/kernel/crash_core.c
>>>>>> index d39892bdb9ae..cdef7d8c91a6 100644
>>>>>> --- a/kernel/crash_core.c
>>>>>> +++ b/kernel/crash_core.c
>>>>>> @@ -321,7 +321,7 @@ int __init parse_crashkernel_low(char *cmdline,
>>>>>>  
>>>>>>  int __init reserve_crashkernel_low(void)
>>>>>>  {
>>>>>> -#ifdef CONFIG_X86_64
>>>>>> +#if defined(CONFIG_X86_64) || defined(CONFIG_ARM64)
>>>>> Not very sure if a CONFIG_64BIT checking is better.
>>>> If doing like this, there may be some compiling errors for other 64-bit kernel, such as mips.
>>>>>>  	unsigned long long base, low_base = 0, low_size = 0;
>>>>>>  	unsigned long low_mem_limit;
>>>>>>  	int ret;
>>>>>> @@ -362,12 +362,14 @@ int __init reserve_crashkernel_low(void)
>>>>>>  
>>>>>>  	crashk_low_res.start = low_base;
>>>>>>  	crashk_low_res.end   = low_base + low_size - 1;
>>>>>> +#ifdef CONFIG_X86_64
>>>>>>  	insert_resource(&iomem_resource, &crashk_low_res);
>>>>>> +#endif
>>>>>>  #endif
>>>>>>  	return 0;
>>>>>>  }
>>>>>>  
>>>>>> -#ifdef CONFIG_X86
>>>>>> +#if defined(CONFIG_X86) || defined(CONFIG_ARM64)
>>>>> Should we make this weak default so that we can remove the ARCH config?
>>>> The same as above, some arch may not support kdump, in that case,  compiling errors occur.
>>> OK, not sure if other people have better idea, oterwise, we can leave with it. 
>>> Thanks for telling.
>> I think it would be better to have CONFIG_ARCH_WANT_RESERVE_CRASH_KERNEL
>> in arch/Kconfig and select this by X86 and ARM64.
>>
>> Since reserve_crashkernel() implementations are quite similart on other
>> architectures as well, we can have more users of this later.
> Yes, this sounds like a nice way.
I will think about this in next version.

Thanks,
Chen Zhou
>
> .
>
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h
index 402d208265a3..79909ae5e22e 100644
--- a/arch/arm64/include/asm/kexec.h
+++ b/arch/arm64/include/asm/kexec.h
@@ -28,7 +28,12 @@ 
 /* 2M alignment for crash kernel regions */
 #define CRASH_ALIGN	SZ_2M
 
+#ifdef CONFIG_ZONE_DMA
+#define CRASH_ADDR_LOW_MAX	arm64_dma_phys_limit
+#else
 #define CRASH_ADDR_LOW_MAX	arm64_dma32_phys_limit
+#endif
+
 #define CRASH_ADDR_HIGH_MAX	MEMBLOCK_ALLOC_ACCESSIBLE
 
 #ifndef __ASSEMBLY__
@@ -96,6 +101,10 @@  static inline void crash_prepare_suspend(void) {}
 static inline void crash_post_resume(void) {}
 #endif
 
+#ifdef CONFIG_KEXEC_CORE
+extern void __init reserve_crashkernel(void);
+#endif
+
 #ifdef CONFIG_KEXEC_FILE
 #define ARCH_HAS_KIMAGE_ARCH
 
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 133257ffd859..6aff30de8f47 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -238,7 +238,18 @@  static void __init request_standard_resources(void)
 		    kernel_data.end <= res->end)
 			request_resource(res, &kernel_data);
 #ifdef CONFIG_KEXEC_CORE
-		/* Userspace will find "Crash kernel" region in /proc/iomem. */
+		/*
+		 * Userspace will find "Crash kernel" or "Crash kernel (low)"
+		 * region in /proc/iomem.
+		 * In order to distinct from the high region and make no effect
+		 * to the use of existing kexec-tools, rename the low region as
+		 * "Crash kernel (low)".
+		 */
+		if (crashk_low_res.end && crashk_low_res.start >= res->start &&
+				crashk_low_res.end <= res->end) {
+			crashk_low_res.name = "Crash kernel (low)";
+			request_resource(res, &crashk_low_res);
+		}
 		if (crashk_res.end && crashk_res.start >= res->start &&
 		    crashk_res.end <= res->end)
 			request_resource(res, &crashk_res);
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index a07fd8e1f926..888c4f7eadc3 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -34,6 +34,7 @@ 
 #include <asm/fixmap.h>
 #include <asm/kasan.h>
 #include <asm/kernel-pgtable.h>
+#include <asm/kexec.h>
 #include <asm/memory.h>
 #include <asm/numa.h>
 #include <asm/sections.h>
@@ -62,66 +63,11 @@  EXPORT_SYMBOL(memstart_addr);
 phys_addr_t arm64_dma_phys_limit __ro_after_init;
 phys_addr_t arm64_dma32_phys_limit __ro_after_init;
 
-#ifdef CONFIG_KEXEC_CORE
-/*
- * reserve_crashkernel() - reserves memory for crash kernel
- *
- * This function reserves memory area given in "crashkernel=" kernel command
- * line parameter. The memory reserved is used by dump capture kernel when
- * primary kernel is crashing.
- */
-static void __init reserve_crashkernel(void)
-{
-	unsigned long long crash_base, crash_size;
-	int ret;
-
-	ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
-				&crash_size, &crash_base);
-	/* no crashkernel= or invalid value specified */
-	if (ret || !crash_size)
-		return;
-
-	crash_size = PAGE_ALIGN(crash_size);
-
-	if (crash_base == 0) {
-		/* Current arm64 boot protocol requires 2MB alignment */
-		crash_base = memblock_find_in_range(CRASH_ALIGN, CRASH_ADDR_LOW_MAX,
-				crash_size, CRASH_ALIGN);
-		if (crash_base == 0) {
-			pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
-				crash_size);
-			return;
-		}
-	} else {
-		/* User specifies base address explicitly. */
-		if (!memblock_is_region_memory(crash_base, crash_size)) {
-			pr_warn("cannot reserve crashkernel: region is not memory\n");
-			return;
-		}
-
-		if (memblock_is_region_reserved(crash_base, crash_size)) {
-			pr_warn("cannot reserve crashkernel: region overlaps reserved memory\n");
-			return;
-		}
-
-		if (!IS_ALIGNED(crash_base, CRASH_ALIGN)) {
-			pr_warn("cannot reserve crashkernel: base address is not 2MB aligned\n");
-			return;
-		}
-	}
-	memblock_reserve(crash_base, crash_size);
-
-	pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n",
-		crash_base, crash_base + crash_size, crash_size >> 20);
-
-	crashk_res.start = crash_base;
-	crashk_res.end = crash_base + crash_size - 1;
-}
-#else
+#ifndef CONFIG_KEXEC_CORE
 static void __init reserve_crashkernel(void)
 {
 }
-#endif /* CONFIG_KEXEC_CORE */
+#endif
 
 #ifdef CONFIG_CRASH_DUMP
 static int __init early_init_dt_scan_elfcorehdr(unsigned long node,
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 1c0f3e02f731..c55cee290bbb 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -488,6 +488,10 @@  static void __init map_mem(pgd_t *pgdp)
 	 */
 	memblock_mark_nomap(kernel_start, kernel_end - kernel_start);
 #ifdef CONFIG_KEXEC_CORE
+	if (crashk_low_res.end)
+		memblock_mark_nomap(crashk_low_res.start,
+				    resource_size(&crashk_low_res));
+
 	if (crashk_res.end)
 		memblock_mark_nomap(crashk_res.start,
 				    resource_size(&crashk_res));
diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index d39892bdb9ae..cdef7d8c91a6 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -321,7 +321,7 @@  int __init parse_crashkernel_low(char *cmdline,
 
 int __init reserve_crashkernel_low(void)
 {
-#ifdef CONFIG_X86_64
+#if defined(CONFIG_X86_64) || defined(CONFIG_ARM64)
 	unsigned long long base, low_base = 0, low_size = 0;
 	unsigned long low_mem_limit;
 	int ret;
@@ -362,12 +362,14 @@  int __init reserve_crashkernel_low(void)
 
 	crashk_low_res.start = low_base;
 	crashk_low_res.end   = low_base + low_size - 1;
+#ifdef CONFIG_X86_64
 	insert_resource(&iomem_resource, &crashk_low_res);
+#endif
 #endif
 	return 0;
 }
 
-#ifdef CONFIG_X86
+#if defined(CONFIG_X86) || defined(CONFIG_ARM64)
 #ifdef CONFIG_KEXEC_CORE
 /*
  * reserve_crashkernel() - reserves memory for crash kernel
@@ -453,7 +455,9 @@  void __init reserve_crashkernel(void)
 
 	crashk_res.start = crash_base;
 	crashk_res.end   = crash_base + crash_size - 1;
+#ifdef CONFIG_X86
 	insert_resource(&iomem_resource, &crashk_res);
+#endif
 }
 #endif /* CONFIG_KEXEC_CORE */
 #endif