diff mbox series

[v21,3/5] arm64: kdump: reimplement crashkernel=X

Message ID 20220227030717.1464-4-thunder.leizhen@huawei.com (mailing list archive)
State New, archived
Headers show
Series support reserving crashkernel above 4G on arm64 kdump | expand

Commit Message

Leizhen (ThunderTown) Feb. 27, 2022, 3:07 a.m. UTC
From: Chen Zhou <chenzhou10@huawei.com>

There are following issues in arm64 kdump:
1. We use crashkernel=X to reserve crashkernel below 4G, which
will fail when there is no enough low memory.
2. If reserving crashkernel above 4G, in this case, crash dump
kernel will boot failure because there is no low memory available
for allocation.

To solve these issues, change the behavior of crashkernel=X and
introduce crashkernel=X,[high,low]. crashkernel=X tries low allocation
in DMA zone, and fall back to high allocation if it fails.
We can also use "crashkernel=X,high" to select a region above DMA zone,
which also tries to allocate at least 256M in DMA zone automatically.
"crashkernel=Y,low" can be used to allocate specified size low memory.

Signed-off-by: Chen Zhou <chenzhou10@huawei.com>
Co-developed-by: Zhen Lei <thunder.leizhen@huawei.com>
Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
---
 arch/arm64/kernel/machine_kexec.c      |   9 ++-
 arch/arm64/kernel/machine_kexec_file.c |  12 ++-
 arch/arm64/mm/init.c                   | 106 +++++++++++++++++++++++--
 3 files changed, 115 insertions(+), 12 deletions(-)

Comments

Baoquan He March 16, 2022, 12:11 p.m. UTC | #1
On 02/27/22 at 11:07am, Zhen Lei wrote:
> From: Chen Zhou <chenzhou10@huawei.com>
> 
> There are following issues in arm64 kdump:
> 1. We use crashkernel=X to reserve crashkernel below 4G, which
> will fail when there is no enough low memory.
> 2. If reserving crashkernel above 4G, in this case, crash dump
> kernel will boot failure because there is no low memory available
> for allocation.
> 
> To solve these issues, change the behavior of crashkernel=X and
> introduce crashkernel=X,[high,low]. crashkernel=X tries low allocation
> in DMA zone, and fall back to high allocation if it fails.
> We can also use "crashkernel=X,high" to select a region above DMA zone,
> which also tries to allocate at least 256M in DMA zone automatically.
> "crashkernel=Y,low" can be used to allocate specified size low memory.
> 
> Signed-off-by: Chen Zhou <chenzhou10@huawei.com>
> Co-developed-by: Zhen Lei <thunder.leizhen@huawei.com>
> Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
> ---
>  arch/arm64/kernel/machine_kexec.c      |   9 ++-
>  arch/arm64/kernel/machine_kexec_file.c |  12 ++-
>  arch/arm64/mm/init.c                   | 106 +++++++++++++++++++++++--
>  3 files changed, 115 insertions(+), 12 deletions(-)
> 
> diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
> index e16b248699d5c3c..19c2d487cb08feb 100644
> --- a/arch/arm64/kernel/machine_kexec.c
> +++ b/arch/arm64/kernel/machine_kexec.c
> @@ -329,8 +329,13 @@ bool crash_is_nosave(unsigned long pfn)
>  
>  	/* in reserved memory? */
>  	addr = __pfn_to_phys(pfn);
> -	if ((addr < crashk_res.start) || (crashk_res.end < addr))
> -		return false;
> +	if ((addr < crashk_res.start) || (crashk_res.end < addr)) {
> +		if (!crashk_low_res.end)
> +			return false;
> +
> +		if ((addr < crashk_low_res.start) || (crashk_low_res.end < addr))
> +			return false;
> +	}
>  
>  	if (!kexec_crash_image)
>  		return true;
> diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c
> index 59c648d51848886..889951291cc0f9c 100644
> --- a/arch/arm64/kernel/machine_kexec_file.c
> +++ b/arch/arm64/kernel/machine_kexec_file.c
> @@ -65,10 +65,18 @@ static int prepare_elf_headers(void **addr, unsigned long *sz)
>  
>  	/* Exclude crashkernel region */
>  	ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end);
> +	if (ret)
> +		goto out;
> +
> +	if (crashk_low_res.end) {
> +		ret = crash_exclude_mem_range(cmem, crashk_low_res.start, crashk_low_res.end);
> +		if (ret)
> +			goto out;
> +	}
>  
> -	if (!ret)
> -		ret =  crash_prepare_elf64_headers(cmem, true, addr, sz);
> +	ret = crash_prepare_elf64_headers(cmem, true, addr, sz);
>  
> +out:
>  	kfree(cmem);
>  	return ret;
>  }
> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
> index 90f276d46b93bc6..30ae6638ff54c47 100644
> --- a/arch/arm64/mm/init.c
> +++ b/arch/arm64/mm/init.c
> @@ -65,6 +65,44 @@ EXPORT_SYMBOL(memstart_addr);
>  phys_addr_t arm64_dma_phys_limit __ro_after_init;
>  
>  #ifdef CONFIG_KEXEC_CORE
> +/* Current arm64 boot protocol requires 2MB alignment */
> +#define CRASH_ALIGN			SZ_2M
> +
> +#define CRASH_ADDR_LOW_MAX		arm64_dma_phys_limit
> +#define CRASH_ADDR_HIGH_MAX		memblock.current_limit
> +
> +/*
> + * This is an empirical value in x86_64 and taken here directly. Please
> + * refer to the code comment in reserve_crashkernel_low() of x86_64 for more
> + * details.
> + */
> +#define DEFAULT_CRASH_KERNEL_LOW_SIZE	\
> +	max(swiotlb_size_or_default() + (8UL << 20), 256UL << 20)
> +
> +static int __init reserve_crashkernel_low(unsigned long long low_size)
> +{
> +	unsigned long long low_base;
> +
> +	/* passed with crashkernel=0,low ? */
> +	if (!low_size)
> +		return 0;
> +
> +	low_base = memblock_phys_alloc_range(low_size, CRASH_ALIGN, 0, CRASH_ADDR_LOW_MAX);
> +	if (!low_base) {
> +		pr_err("cannot allocate crashkernel low memory (size:0x%llx).\n", low_size);
> +		return -ENOMEM;
> +	}
> +
> +	pr_info("crashkernel low memory reserved: 0x%08llx - 0x%08llx (%lld MB)\n",
> +		low_base, low_base + low_size, low_size >> 20);
> +
> +	crashk_low_res.start = low_base;
> +	crashk_low_res.end   = low_base + low_size - 1;
> +	insert_resource(&iomem_resource, &crashk_low_res);
> +
> +	return 0;
> +}
> +
>  /*
>   * reserve_crashkernel() - reserves memory for crash kernel
>   *
> @@ -75,30 +113,79 @@ phys_addr_t arm64_dma_phys_limit __ro_after_init;
>  static void __init reserve_crashkernel(void)
>  {
>  	unsigned long long crash_base, crash_size;
> -	unsigned long long crash_max = arm64_dma_phys_limit;
> +	unsigned long long crash_low_size;
> +	unsigned long long crash_max = CRASH_ADDR_LOW_MAX;
>  	int ret;

Even though reverse xmas tree style is not enforced, this 'int ret;' is
really annoying to look at. Maybe move it down two lines.

> +	bool fixed_base, high = false;
> +	char *cmdline = boot_command_line;
>  
> -	ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
> +	/* crashkernel=X[@offset] */
> +	ret = parse_crashkernel(cmdline, memblock_phys_mem_size(),
>  				&crash_size, &crash_base);
> -	/* no crashkernel= or invalid value specified */
> -	if (ret || !crash_size)
> -		return;
> +	if (ret || !crash_size) {
> +		/* crashkernel=X,high */
> +		ret = parse_crashkernel_high(cmdline, 0, &crash_size, &crash_base);
> +		if (ret || !crash_size)
> +			return;
> +
> +		/* crashkernel=Y,low */
> +		ret = parse_crashkernel_low(cmdline, 0, &crash_low_size, &crash_base);
> +		if (ret == -ENOENT)
> +			/*
> +			 * crashkernel=Y,low is not specified explicitly, use
> +			 * default size automatically.
> +			 */
> +			crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE;
> +		else if (ret)
> +			/* crashkernel=Y,low is specified but Y is invalid */
> +			return;
> +
> +		/* Mark crashkernel=X,high is specified */
> +		high = true;
> +		crash_max = CRASH_ADDR_HIGH_MAX;
> +	}
>  
> +	fixed_base = !!crash_base;
>  	crash_size = PAGE_ALIGN(crash_size);
>  
>  	/* User specifies base address explicitly. */
This is over commenting, can't see why it's needed.
> -	if (crash_base)
> +	if (fixed_base)
>  		crash_max = crash_base + crash_size;

Hi leizhen,

I made change on reserve_crashkenrel(), inline comment may be slow.
Please check and consider if they can be taken.

diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 30ae6638ff54..f96351da1e3e 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -109,38 +109,43 @@ static int __init reserve_crashkernel_low(unsigned long long low_size)
  * This function reserves memory area given in "crashkernel=" kernel command
  * line parameter. The memory reserved is used by dump capture kernel when
  * primary kernel is crashing.
+ *
+ * NOTE: Reservation of crashkernel,low is special since its existence
+ * is not independent, need rely on the existence of crashkernel,high.
+ * Hence there are different cases for crashkernel,low reservation:
+ * 1) crashkernel=Y,low is specified explicitly, crashkernel,low takes Y;
+ * 2) crashkernel=,low is not given, while crashkernel=,high is specified,
+ *    take the default crashkernel,low value;
+ * 3) crashkernel=X is specified, while fallback to get a memory region
+ *    in high memory, take the default crashkernel,low value;
+ * 4) crashkernel='invalid value',low is specified, failed the whole
+ *    crashkernel reservation and bail out.
  */
 static void __init reserve_crashkernel(void)
 {
 	unsigned long long crash_base, crash_size;
 	unsigned long long crash_low_size;
 	unsigned long long crash_max = CRASH_ADDR_LOW_MAX;
-	int ret;
 	bool fixed_base, high = false;
 	char *cmdline = boot_command_line;
+	int ret;
 
 	/* crashkernel=X[@offset] */
 	ret = parse_crashkernel(cmdline, memblock_phys_mem_size(),
 				&crash_size, &crash_base);
 	if (ret || !crash_size) {
-		/* crashkernel=X,high */
 		ret = parse_crashkernel_high(cmdline, 0, &crash_size, &crash_base);
 		if (ret || !crash_size)
 			return;
 
-		/* crashkernel=Y,low */
 		ret = parse_crashkernel_low(cmdline, 0, &crash_low_size, &crash_base);
 		if (ret == -ENOENT)
-			/*
-			 * crashkernel=Y,low is not specified explicitly, use
-			 * default size automatically.
-			 */
+			/* case #2 of crashkernel,low reservation */
 			crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE;
 		else if (ret)
-			/* crashkernel=Y,low is specified but Y is invalid */
+			/* case #4 of crashkernel,low reservation */
 			return;
 
-		/* Mark crashkernel=X,high is specified */
 		high = true;
 		crash_max = CRASH_ADDR_HIGH_MAX;
 	}
@@ -148,7 +153,6 @@ static void __init reserve_crashkernel(void)
 	fixed_base = !!crash_base;
 	crash_size = PAGE_ALIGN(crash_size);
 
-	/* User specifies base address explicitly. */
 	if (fixed_base)
 		crash_max = crash_base + crash_size;
 
@@ -172,11 +176,7 @@ static void __init reserve_crashkernel(void)
 	}
 
 	if (crash_base >= SZ_4G) {
-		/*
-		 * For case crashkernel=X, low memory is not enough and fall
-		 * back to reserve specified size of memory above 4G, try to
-		 * allocate minimum required memory below 4G again.
-		 */
+		/* case #3 of crashkernel,low reservation */
 		if (!high)
 			crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE;
 

>  
> -	/* Current arm64 boot protocol requires 2MB alignment */
> -	crash_base = memblock_phys_alloc_range(crash_size, SZ_2M,
> +retry:
> +	crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN,
>  					       crash_base, crash_max);
>  	if (!crash_base) {
> +		/*
> +		 * Attempt to fully allocate low memory failed, fall back
> +		 * to high memory, the minimum required low memory will be
> +		 * reserved later.
> +		 */
> +		if (!fixed_base && (crash_max == CRASH_ADDR_LOW_MAX)) {
> +			crash_max = CRASH_ADDR_HIGH_MAX;
> +			goto retry;
> +		}
> +
>  		pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
>  			crash_size);
>  		return;
>  	}
>  
> +	if (crash_base >= SZ_4G) {
> +		/*
> +		 * For case crashkernel=X, low memory is not enough and fall
> +		 * back to reserve specified size of memory above 4G, try to
> +		 * allocate minimum required memory below 4G again.
> +		 */
> +		if (!high)
> +			crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE;
> +
> +		if (reserve_crashkernel_low(crash_low_size)) {
> +			memblock_phys_free(crash_base, crash_size);
> +			return;
> +		}
> +	}
> +
>  	pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n",
>  		crash_base, crash_base + crash_size, crash_size >> 20);
>  
> @@ -107,6 +194,9 @@ static void __init reserve_crashkernel(void)
>  	 * map. Inform kmemleak so that it won't try to access it.
>  	 */
>  	kmemleak_ignore_phys(crash_base);
> +	if (crashk_low_res.end)
> +		kmemleak_ignore_phys(crashk_low_res.start);
> +
>  	crashk_res.start = crash_base;
>  	crashk_res.end = crash_base + crash_size - 1;
>  	insert_resource(&iomem_resource, &crashk_res);
> -- 
> 2.25.1
>
Leizhen (ThunderTown) March 16, 2022, 1:11 p.m. UTC | #2
On 2022/3/16 20:11, Baoquan He wrote:
> On 02/27/22 at 11:07am, Zhen Lei wrote:
>> From: Chen Zhou <chenzhou10@huawei.com>
>>
>> There are following issues in arm64 kdump:
>> 1. We use crashkernel=X to reserve crashkernel below 4G, which
>> will fail when there is no enough low memory.
>> 2. If reserving crashkernel above 4G, in this case, crash dump
>> kernel will boot failure because there is no low memory available
>> for allocation.
>>
>> To solve these issues, change the behavior of crashkernel=X and
>> introduce crashkernel=X,[high,low]. crashkernel=X tries low allocation
>> in DMA zone, and fall back to high allocation if it fails.
>> We can also use "crashkernel=X,high" to select a region above DMA zone,
>> which also tries to allocate at least 256M in DMA zone automatically.
>> "crashkernel=Y,low" can be used to allocate specified size low memory.
>>
>> Signed-off-by: Chen Zhou <chenzhou10@huawei.com>
>> Co-developed-by: Zhen Lei <thunder.leizhen@huawei.com>
>> Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
>> ---
>>  arch/arm64/kernel/machine_kexec.c      |   9 ++-
>>  arch/arm64/kernel/machine_kexec_file.c |  12 ++-
>>  arch/arm64/mm/init.c                   | 106 +++++++++++++++++++++++--
>>  3 files changed, 115 insertions(+), 12 deletions(-)
>>
>> diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
>> index e16b248699d5c3c..19c2d487cb08feb 100644
>> --- a/arch/arm64/kernel/machine_kexec.c
>> +++ b/arch/arm64/kernel/machine_kexec.c
>> @@ -329,8 +329,13 @@ bool crash_is_nosave(unsigned long pfn)
>>  
>>  	/* in reserved memory? */
>>  	addr = __pfn_to_phys(pfn);
>> -	if ((addr < crashk_res.start) || (crashk_res.end < addr))
>> -		return false;
>> +	if ((addr < crashk_res.start) || (crashk_res.end < addr)) {
>> +		if (!crashk_low_res.end)
>> +			return false;
>> +
>> +		if ((addr < crashk_low_res.start) || (crashk_low_res.end < addr))
>> +			return false;
>> +	}
>>  
>>  	if (!kexec_crash_image)
>>  		return true;
>> diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c
>> index 59c648d51848886..889951291cc0f9c 100644
>> --- a/arch/arm64/kernel/machine_kexec_file.c
>> +++ b/arch/arm64/kernel/machine_kexec_file.c
>> @@ -65,10 +65,18 @@ static int prepare_elf_headers(void **addr, unsigned long *sz)
>>  
>>  	/* Exclude crashkernel region */
>>  	ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end);
>> +	if (ret)
>> +		goto out;
>> +
>> +	if (crashk_low_res.end) {
>> +		ret = crash_exclude_mem_range(cmem, crashk_low_res.start, crashk_low_res.end);
>> +		if (ret)
>> +			goto out;
>> +	}
>>  
>> -	if (!ret)
>> -		ret =  crash_prepare_elf64_headers(cmem, true, addr, sz);
>> +	ret = crash_prepare_elf64_headers(cmem, true, addr, sz);
>>  
>> +out:
>>  	kfree(cmem);
>>  	return ret;
>>  }
>> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
>> index 90f276d46b93bc6..30ae6638ff54c47 100644
>> --- a/arch/arm64/mm/init.c
>> +++ b/arch/arm64/mm/init.c
>> @@ -65,6 +65,44 @@ EXPORT_SYMBOL(memstart_addr);
>>  phys_addr_t arm64_dma_phys_limit __ro_after_init;
>>  
>>  #ifdef CONFIG_KEXEC_CORE
>> +/* Current arm64 boot protocol requires 2MB alignment */
>> +#define CRASH_ALIGN			SZ_2M
>> +
>> +#define CRASH_ADDR_LOW_MAX		arm64_dma_phys_limit
>> +#define CRASH_ADDR_HIGH_MAX		memblock.current_limit
>> +
>> +/*
>> + * This is an empirical value in x86_64 and taken here directly. Please
>> + * refer to the code comment in reserve_crashkernel_low() of x86_64 for more
>> + * details.
>> + */
>> +#define DEFAULT_CRASH_KERNEL_LOW_SIZE	\
>> +	max(swiotlb_size_or_default() + (8UL << 20), 256UL << 20)
>> +
>> +static int __init reserve_crashkernel_low(unsigned long long low_size)
>> +{
>> +	unsigned long long low_base;
>> +
>> +	/* passed with crashkernel=0,low ? */
>> +	if (!low_size)
>> +		return 0;
>> +
>> +	low_base = memblock_phys_alloc_range(low_size, CRASH_ALIGN, 0, CRASH_ADDR_LOW_MAX);
>> +	if (!low_base) {
>> +		pr_err("cannot allocate crashkernel low memory (size:0x%llx).\n", low_size);
>> +		return -ENOMEM;
>> +	}
>> +
>> +	pr_info("crashkernel low memory reserved: 0x%08llx - 0x%08llx (%lld MB)\n",
>> +		low_base, low_base + low_size, low_size >> 20);
>> +
>> +	crashk_low_res.start = low_base;
>> +	crashk_low_res.end   = low_base + low_size - 1;
>> +	insert_resource(&iomem_resource, &crashk_low_res);
>> +
>> +	return 0;
>> +}
>> +
>>  /*
>>   * reserve_crashkernel() - reserves memory for crash kernel
>>   *
>> @@ -75,30 +113,79 @@ phys_addr_t arm64_dma_phys_limit __ro_after_init;
>>  static void __init reserve_crashkernel(void)
>>  {
>>  	unsigned long long crash_base, crash_size;
>> -	unsigned long long crash_max = arm64_dma_phys_limit;
>> +	unsigned long long crash_low_size;
>> +	unsigned long long crash_max = CRASH_ADDR_LOW_MAX;
>>  	int ret;
> 
> Even though reverse xmas tree style is not enforced, this 'int ret;' is
> really annoying to look at. Maybe move it down two lines.
> 
>> +	bool fixed_base, high = false;
>> +	char *cmdline = boot_command_line;
>>  
>> -	ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
>> +	/* crashkernel=X[@offset] */
>> +	ret = parse_crashkernel(cmdline, memblock_phys_mem_size(),
>>  				&crash_size, &crash_base);
>> -	/* no crashkernel= or invalid value specified */
>> -	if (ret || !crash_size)
>> -		return;
>> +	if (ret || !crash_size) {
>> +		/* crashkernel=X,high */
>> +		ret = parse_crashkernel_high(cmdline, 0, &crash_size, &crash_base);
>> +		if (ret || !crash_size)
>> +			return;
>> +
>> +		/* crashkernel=Y,low */
>> +		ret = parse_crashkernel_low(cmdline, 0, &crash_low_size, &crash_base);
>> +		if (ret == -ENOENT)
>> +			/*
>> +			 * crashkernel=Y,low is not specified explicitly, use
>> +			 * default size automatically.
>> +			 */
>> +			crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE;
>> +		else if (ret)
>> +			/* crashkernel=Y,low is specified but Y is invalid */
>> +			return;
>> +
>> +		/* Mark crashkernel=X,high is specified */
>> +		high = true;
>> +		crash_max = CRASH_ADDR_HIGH_MAX;
>> +	}
>>  
>> +	fixed_base = !!crash_base;
>>  	crash_size = PAGE_ALIGN(crash_size);
>>  
>>  	/* User specifies base address explicitly. */
> This is over commenting, can't see why it's needed.
>> -	if (crash_base)
>> +	if (fixed_base)
>>  		crash_max = crash_base + crash_size;
> 
> Hi leizhen,
> 
> I made change on reserve_crashkenrel(), inline comment may be slow.
> Please check and consider if they can be taken.

That's great. Thank you very much.

> 
> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
> index 30ae6638ff54..f96351da1e3e 100644
> --- a/arch/arm64/mm/init.c
> +++ b/arch/arm64/mm/init.c
> @@ -109,38 +109,43 @@ static int __init reserve_crashkernel_low(unsigned long long low_size)
>   * This function reserves memory area given in "crashkernel=" kernel command
>   * line parameter. The memory reserved is used by dump capture kernel when
>   * primary kernel is crashing.
> + *
> + * NOTE: Reservation of crashkernel,low is special since its existence
> + * is not independent, need rely on the existence of crashkernel,high.
> + * Hence there are different cases for crashkernel,low reservation:
> + * 1) crashkernel=Y,low is specified explicitly, crashkernel,low takes Y;
> + * 2) crashkernel=,low is not given, while crashkernel=,high is specified,
> + *    take the default crashkernel,low value;
> + * 3) crashkernel=X is specified, while fallback to get a memory region
> + *    in high memory, take the default crashkernel,low value;
> + * 4) crashkernel='invalid value',low is specified, failed the whole
> + *    crashkernel reservation and bail out.
>   */
>  static void __init reserve_crashkernel(void)
>  {
>  	unsigned long long crash_base, crash_size;
>  	unsigned long long crash_low_size;
>  	unsigned long long crash_max = CRASH_ADDR_LOW_MAX;
> -	int ret;
>  	bool fixed_base, high = false;
>  	char *cmdline = boot_command_line;
> +	int ret;
>  
>  	/* crashkernel=X[@offset] */
>  	ret = parse_crashkernel(cmdline, memblock_phys_mem_size(),
>  				&crash_size, &crash_base);
>  	if (ret || !crash_size) {
> -		/* crashkernel=X,high */
>  		ret = parse_crashkernel_high(cmdline, 0, &crash_size, &crash_base);
>  		if (ret || !crash_size)
>  			return;
>  
> -		/* crashkernel=Y,low */
>  		ret = parse_crashkernel_low(cmdline, 0, &crash_low_size, &crash_base);
>  		if (ret == -ENOENT)
> -			/*
> -			 * crashkernel=Y,low is not specified explicitly, use
> -			 * default size automatically.
> -			 */
> +			/* case #2 of crashkernel,low reservation */
>  			crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE;
>  		else if (ret)
> -			/* crashkernel=Y,low is specified but Y is invalid */
> +			/* case #4 of crashkernel,low reservation */
>  			return;
>  
> -		/* Mark crashkernel=X,high is specified */
>  		high = true;
>  		crash_max = CRASH_ADDR_HIGH_MAX;
>  	}
> @@ -148,7 +153,6 @@ static void __init reserve_crashkernel(void)
>  	fixed_base = !!crash_base;
>  	crash_size = PAGE_ALIGN(crash_size);
>  
> -	/* User specifies base address explicitly. */
>  	if (fixed_base)
>  		crash_max = crash_base + crash_size;
>  
> @@ -172,11 +176,7 @@ static void __init reserve_crashkernel(void)
>  	}
>  
>  	if (crash_base >= SZ_4G) {
> -		/*
> -		 * For case crashkernel=X, low memory is not enough and fall
> -		 * back to reserve specified size of memory above 4G, try to
> -		 * allocate minimum required memory below 4G again.
> -		 */
> +		/* case #3 of crashkernel,low reservation */
>  		if (!high)
>  			crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE;
>  
> 
>>  
>> -	/* Current arm64 boot protocol requires 2MB alignment */
>> -	crash_base = memblock_phys_alloc_range(crash_size, SZ_2M,
>> +retry:
>> +	crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN,
>>  					       crash_base, crash_max);
>>  	if (!crash_base) {
>> +		/*
>> +		 * Attempt to fully allocate low memory failed, fall back
>> +		 * to high memory, the minimum required low memory will be
>> +		 * reserved later.
>> +		 */
>> +		if (!fixed_base && (crash_max == CRASH_ADDR_LOW_MAX)) {
>> +			crash_max = CRASH_ADDR_HIGH_MAX;
>> +			goto retry;
>> +		}
>> +
>>  		pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
>>  			crash_size);
>>  		return;
>>  	}
>>  
>> +	if (crash_base >= SZ_4G) {
>> +		/*
>> +		 * For case crashkernel=X, low memory is not enough and fall
>> +		 * back to reserve specified size of memory above 4G, try to
>> +		 * allocate minimum required memory below 4G again.
>> +		 */
>> +		if (!high)
>> +			crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE;
>> +
>> +		if (reserve_crashkernel_low(crash_low_size)) {
>> +			memblock_phys_free(crash_base, crash_size);
>> +			return;
>> +		}
>> +	}
>> +
>>  	pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n",
>>  		crash_base, crash_base + crash_size, crash_size >> 20);
>>  
>> @@ -107,6 +194,9 @@ static void __init reserve_crashkernel(void)
>>  	 * map. Inform kmemleak so that it won't try to access it.
>>  	 */
>>  	kmemleak_ignore_phys(crash_base);
>> +	if (crashk_low_res.end)
>> +		kmemleak_ignore_phys(crashk_low_res.start);
>> +
>>  	crashk_res.start = crash_base;
>>  	crashk_res.end = crash_base + crash_size - 1;
>>  	insert_resource(&iomem_resource, &crashk_res);
>> -- 
>> 2.25.1
>>
> 
> .
>
Baoquan He March 17, 2022, 2:36 a.m. UTC | #3
On 03/16/22 at 09:11pm, Leizhen (ThunderTown) wrote:
> 
> 
> On 2022/3/16 20:11, Baoquan He wrote:
> > On 02/27/22 at 11:07am, Zhen Lei wrote:
...... 

> > Hi leizhen,
> > 
> > I made change on reserve_crashkenrel(), inline comment may be slow.
> > Please check and consider if they can be taken.
> 
> That's great. Thank you very much.
> 
> > 
> > diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
> > index 30ae6638ff54..f96351da1e3e 100644
> > --- a/arch/arm64/mm/init.c
> > +++ b/arch/arm64/mm/init.c
> > @@ -109,38 +109,43 @@ static int __init reserve_crashkernel_low(unsigned long long low_size)
> >   * This function reserves memory area given in "crashkernel=" kernel command
> >   * line parameter. The memory reserved is used by dump capture kernel when
> >   * primary kernel is crashing.
> > + *
> > + * NOTE: Reservation of crashkernel,low is special since its existence
> > + * is not independent, need rely on the existence of crashkernel,high.
> > + * Hence there are different cases for crashkernel,low reservation:

Considering to update the 3rd line as below:

 * NOTE: Reservation of crashkernel,low is special since its existence
 * is not independent, need rely on the existence of crashkernel,high.
 * Here, four cases of crashkernel,low reservation are summarized: 

> > + * 1) crashkernel=Y,low is specified explicitly, crashkernel,low takes Y;
> > + * 2) crashkernel=,low is not given, while crashkernel=,high is specified,
> > + *    take the default crashkernel,low value;
> > + * 3) crashkernel=X is specified, while fallback to get a memory region
> > + *    in high memory, take the default crashkernel,low value;
> > + * 4) crashkernel='invalid value',low is specified, failed the whole
> > + *    crashkernel reservation and bail out.
> >   */
> >  static void __init reserve_crashkernel(void)
> >  {
> >  	unsigned long long crash_base, crash_size;
> >  	unsigned long long crash_low_size;
> >  	unsigned long long crash_max = CRASH_ADDR_LOW_MAX;
> > -	int ret;
> >  	bool fixed_base, high = false;
> >  	char *cmdline = boot_command_line;
> > +	int ret;
> >  
> >  	/* crashkernel=X[@offset] */
> >  	ret = parse_crashkernel(cmdline, memblock_phys_mem_size(),
> >  				&crash_size, &crash_base);
> >  	if (ret || !crash_size) {
> > -		/* crashkernel=X,high */
> >  		ret = parse_crashkernel_high(cmdline, 0, &crash_size, &crash_base);
> >  		if (ret || !crash_size)
> >  			return;
> >  
> > -		/* crashkernel=Y,low */
> >  		ret = parse_crashkernel_low(cmdline, 0, &crash_low_size, &crash_base);
> >  		if (ret == -ENOENT)
> > -			/*
> > -			 * crashkernel=Y,low is not specified explicitly, use
> > -			 * default size automatically.
> > -			 */
> > +			/* case #2 of crashkernel,low reservation */
> >  			crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE;
> >  		else if (ret)
> > -			/* crashkernel=Y,low is specified but Y is invalid */
> > +			/* case #4 of crashkernel,low reservation */
> >  			return;
> >  
> > -		/* Mark crashkernel=X,high is specified */
> >  		high = true;
> >  		crash_max = CRASH_ADDR_HIGH_MAX;
> >  	}
> > @@ -148,7 +153,6 @@ static void __init reserve_crashkernel(void)
> >  	fixed_base = !!crash_base;
> >  	crash_size = PAGE_ALIGN(crash_size);
> >  
> > -	/* User specifies base address explicitly. */
> >  	if (fixed_base)
> >  		crash_max = crash_base + crash_size;
> >  
> > @@ -172,11 +176,7 @@ static void __init reserve_crashkernel(void)
> >  	}
> >  
> >  	if (crash_base >= SZ_4G) {
> > -		/*
> > -		 * For case crashkernel=X, low memory is not enough and fall
> > -		 * back to reserve specified size of memory above 4G, try to
> > -		 * allocate minimum required memory below 4G again.
> > -		 */
> > +		/* case #3 of crashkernel,low reservation */
> >  		if (!high)
> >  			crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE;
> >  
> > 
> >>  
> >> -	/* Current arm64 boot protocol requires 2MB alignment */
> >> -	crash_base = memblock_phys_alloc_range(crash_size, SZ_2M,
> >> +retry:
> >> +	crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN,
> >>  					       crash_base, crash_max);
> >>  	if (!crash_base) {
> >> +		/*
> >> +		 * Attempt to fully allocate low memory failed, fall back
> >> +		 * to high memory, the minimum required low memory will be
> >> +		 * reserved later.
> >> +		 */
> >> +		if (!fixed_base && (crash_max == CRASH_ADDR_LOW_MAX)) {
> >> +			crash_max = CRASH_ADDR_HIGH_MAX;
> >> +			goto retry;
> >> +		}
> >> +
> >>  		pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
> >>  			crash_size);
> >>  		return;
> >>  	}
> >>  
> >> +	if (crash_base >= SZ_4G) {
> >> +		/*
> >> +		 * For case crashkernel=X, low memory is not enough and fall
> >> +		 * back to reserve specified size of memory above 4G, try to
> >> +		 * allocate minimum required memory below 4G again.
> >> +		 */
> >> +		if (!high)
> >> +			crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE;
> >> +
> >> +		if (reserve_crashkernel_low(crash_low_size)) {
> >> +			memblock_phys_free(crash_base, crash_size);
> >> +			return;
> >> +		}
> >> +	}
> >> +
> >>  	pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n",
> >>  		crash_base, crash_base + crash_size, crash_size >> 20);
> >>  
> >> @@ -107,6 +194,9 @@ static void __init reserve_crashkernel(void)
> >>  	 * map. Inform kmemleak so that it won't try to access it.
> >>  	 */
> >>  	kmemleak_ignore_phys(crash_base);
> >> +	if (crashk_low_res.end)
> >> +		kmemleak_ignore_phys(crashk_low_res.start);
> >> +
> >>  	crashk_res.start = crash_base;
> >>  	crashk_res.end = crash_base + crash_size - 1;
> >>  	insert_resource(&iomem_resource, &crashk_res);
> >> -- 
> >> 2.25.1
> >>
> > 
> > .
> > 
> 
> -- 
> Regards,
>   Zhen Lei
>
Baoquan He March 17, 2022, 2:38 a.m. UTC | #4
On 02/27/22 at 11:07am, Zhen Lei wrote:
> From: Chen Zhou <chenzhou10@huawei.com>
> 
> There are following issues in arm64 kdump:
> 1. We use crashkernel=X to reserve crashkernel below 4G, which
> will fail when there is no enough low memory.
> 2. If reserving crashkernel above 4G, in this case, crash dump
> kernel will boot failure because there is no low memory available
              ~~ change it to "get boot failure" or "fail to boot"
> for allocation.
> 
> To solve these issues, change the behavior of crashkernel=X and
> introduce crashkernel=X,[high,low]. crashkernel=X tries low allocation
> in DMA zone, and fall back to high allocation if it fails.
> We can also use "crashkernel=X,high" to select a region above DMA zone,
> which also tries to allocate at least 256M in DMA zone automatically.
> "crashkernel=Y,low" can be used to allocate specified size low memory.
> 
> Signed-off-by: Chen Zhou <chenzhou10@huawei.com>
> Co-developed-by: Zhen Lei <thunder.leizhen@huawei.com>
> Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Leizhen (ThunderTown) March 17, 2022, 3:19 a.m. UTC | #5
On 2022/3/17 10:36, Baoquan He wrote:
> On 03/16/22 at 09:11pm, Leizhen (ThunderTown) wrote:
>>
>>
>> On 2022/3/16 20:11, Baoquan He wrote:
>>> On 02/27/22 at 11:07am, Zhen Lei wrote:
> ...... 
> 
>>> Hi leizhen,
>>>
>>> I made change on reserve_crashkenrel(), inline comment may be slow.
>>> Please check and consider if they can be taken.
>>
>> That's great. Thank you very much.
>>
>>>
>>> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
>>> index 30ae6638ff54..f96351da1e3e 100644
>>> --- a/arch/arm64/mm/init.c
>>> +++ b/arch/arm64/mm/init.c
>>> @@ -109,38 +109,43 @@ static int __init reserve_crashkernel_low(unsigned long long low_size)
>>>   * This function reserves memory area given in "crashkernel=" kernel command
>>>   * line parameter. The memory reserved is used by dump capture kernel when
>>>   * primary kernel is crashing.
>>> + *
>>> + * NOTE: Reservation of crashkernel,low is special since its existence
>>> + * is not independent, need rely on the existence of crashkernel,high.
>>> + * Hence there are different cases for crashkernel,low reservation:
> 
> Considering to update the 3rd line as below:
> 
>  * NOTE: Reservation of crashkernel,low is special since its existence
>  * is not independent, need rely on the existence of crashkernel,high.
>  * Here, four cases of crashkernel,low reservation are summarized: 

OK. How about change "crashkernel,low" to "crashkernel low memory"?
"crashkernel=Y,low", "crashkernel=,low" and "crashkernel,low" are very similar,
may dazzle the reader.

> 
>>> + * 1) crashkernel=Y,low is specified explicitly, crashkernel,low takes Y;
>>> + * 2) crashkernel=,low is not given, while crashkernel=,high is specified,
>>> + *    take the default crashkernel,low value;
>>> + * 3) crashkernel=X is specified, while fallback to get a memory region
>>> + *    in high memory, take the default crashkernel,low value;
>>> + * 4) crashkernel='invalid value',low is specified, failed the whole
>>> + *    crashkernel reservation and bail out.
>>>   */
>>>  static void __init reserve_crashkernel(void)
>>>  {
>>>  	unsigned long long crash_base, crash_size;
>>>  	unsigned long long crash_low_size;
>>>  	unsigned long long crash_max = CRASH_ADDR_LOW_MAX;
>>> -	int ret;
>>>  	bool fixed_base, high = false;
>>>  	char *cmdline = boot_command_line;
>>> +	int ret;
>>>  
>>>  	/* crashkernel=X[@offset] */
>>>  	ret = parse_crashkernel(cmdline, memblock_phys_mem_size(),
>>>  				&crash_size, &crash_base);
>>>  	if (ret || !crash_size) {
>>> -		/* crashkernel=X,high */
>>>  		ret = parse_crashkernel_high(cmdline, 0, &crash_size, &crash_base);
>>>  		if (ret || !crash_size)
>>>  			return;
>>>  
>>> -		/* crashkernel=Y,low */
>>>  		ret = parse_crashkernel_low(cmdline, 0, &crash_low_size, &crash_base);
>>>  		if (ret == -ENOENT)
>>> -			/*
>>> -			 * crashkernel=Y,low is not specified explicitly, use
>>> -			 * default size automatically.
>>> -			 */
>>> +			/* case #2 of crashkernel,low reservation */
>>>  			crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE;
>>>  		else if (ret)
>>> -			/* crashkernel=Y,low is specified but Y is invalid */
>>> +			/* case #4 of crashkernel,low reservation */
>>>  			return;
>>>  
>>> -		/* Mark crashkernel=X,high is specified */
>>>  		high = true;
>>>  		crash_max = CRASH_ADDR_HIGH_MAX;
>>>  	}
>>> @@ -148,7 +153,6 @@ static void __init reserve_crashkernel(void)
>>>  	fixed_base = !!crash_base;
>>>  	crash_size = PAGE_ALIGN(crash_size);
>>>  
>>> -	/* User specifies base address explicitly. */
>>>  	if (fixed_base)
>>>  		crash_max = crash_base + crash_size;
>>>  
>>> @@ -172,11 +176,7 @@ static void __init reserve_crashkernel(void)
>>>  	}
>>>  
>>>  	if (crash_base >= SZ_4G) {
>>> -		/*
>>> -		 * For case crashkernel=X, low memory is not enough and fall
>>> -		 * back to reserve specified size of memory above 4G, try to
>>> -		 * allocate minimum required memory below 4G again.
>>> -		 */
>>> +		/* case #3 of crashkernel,low reservation */
>>>  		if (!high)
>>>  			crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE;
>>>  
>>>
>>>>  
>>>> -	/* Current arm64 boot protocol requires 2MB alignment */
>>>> -	crash_base = memblock_phys_alloc_range(crash_size, SZ_2M,
>>>> +retry:
>>>> +	crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN,
>>>>  					       crash_base, crash_max);
>>>>  	if (!crash_base) {
>>>> +		/*
>>>> +		 * Attempt to fully allocate low memory failed, fall back
>>>> +		 * to high memory, the minimum required low memory will be
>>>> +		 * reserved later.
>>>> +		 */
>>>> +		if (!fixed_base && (crash_max == CRASH_ADDR_LOW_MAX)) {
>>>> +			crash_max = CRASH_ADDR_HIGH_MAX;
>>>> +			goto retry;
>>>> +		}
>>>> +
>>>>  		pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
>>>>  			crash_size);
>>>>  		return;
>>>>  	}
>>>>  
>>>> +	if (crash_base >= SZ_4G) {
>>>> +		/*
>>>> +		 * For case crashkernel=X, low memory is not enough and fall
>>>> +		 * back to reserve specified size of memory above 4G, try to
>>>> +		 * allocate minimum required memory below 4G again.
>>>> +		 */
>>>> +		if (!high)
>>>> +			crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE;
>>>> +
>>>> +		if (reserve_crashkernel_low(crash_low_size)) {
>>>> +			memblock_phys_free(crash_base, crash_size);
>>>> +			return;
>>>> +		}
>>>> +	}
>>>> +
>>>>  	pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n",
>>>>  		crash_base, crash_base + crash_size, crash_size >> 20);
>>>>  
>>>> @@ -107,6 +194,9 @@ static void __init reserve_crashkernel(void)
>>>>  	 * map. Inform kmemleak so that it won't try to access it.
>>>>  	 */
>>>>  	kmemleak_ignore_phys(crash_base);
>>>> +	if (crashk_low_res.end)
>>>> +		kmemleak_ignore_phys(crashk_low_res.start);
>>>> +
>>>>  	crashk_res.start = crash_base;
>>>>  	crashk_res.end = crash_base + crash_size - 1;
>>>>  	insert_resource(&iomem_resource, &crashk_res);
>>>> -- 
>>>> 2.25.1
>>>>
>>>
>>> .
>>>
>>
>> -- 
>> Regards,
>>   Zhen Lei
>>
> 
> .
>
Leizhen (ThunderTown) March 17, 2022, 3:23 a.m. UTC | #6
On 2022/3/17 10:38, Baoquan He wrote:
> On 02/27/22 at 11:07am, Zhen Lei wrote:
>> From: Chen Zhou <chenzhou10@huawei.com>
>>
>> There are following issues in arm64 kdump:
>> 1. We use crashkernel=X to reserve crashkernel below 4G, which
>> will fail when there is no enough low memory.
>> 2. If reserving crashkernel above 4G, in this case, crash dump
>> kernel will boot failure because there is no low memory available
>               ~~ change it to "get boot failure" or "fail to boot"

OK. I'm going to use "fail to boot".

>> for allocation.
>>
>> To solve these issues, change the behavior of crashkernel=X and
>> introduce crashkernel=X,[high,low]. crashkernel=X tries low allocation
>> in DMA zone, and fall back to high allocation if it fails.
>> We can also use "crashkernel=X,high" to select a region above DMA zone,
>> which also tries to allocate at least 256M in DMA zone automatically.
>> "crashkernel=Y,low" can be used to allocate specified size low memory.
>>
>> Signed-off-by: Chen Zhou <chenzhou10@huawei.com>
>> Co-developed-by: Zhen Lei <thunder.leizhen@huawei.com>
>> Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
> 
> .
>
Baoquan He March 17, 2022, 3:47 a.m. UTC | #7
On 03/17/22 at 11:19am, Leizhen (ThunderTown) wrote:
> 
> 
> On 2022/3/17 10:36, Baoquan He wrote:
> > On 03/16/22 at 09:11pm, Leizhen (ThunderTown) wrote:
> >>
> >>
> >> On 2022/3/16 20:11, Baoquan He wrote:
> >>> On 02/27/22 at 11:07am, Zhen Lei wrote:
> > ...... 
> > 
> >>> Hi leizhen,
> >>>
> >>> I made change on reserve_crashkenrel(), inline comment may be slow.
> >>> Please check and consider if they can be taken.
> >>
> >> That's great. Thank you very much.
> >>
> >>>
> >>> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
> >>> index 30ae6638ff54..f96351da1e3e 100644
> >>> --- a/arch/arm64/mm/init.c
> >>> +++ b/arch/arm64/mm/init.c
> >>> @@ -109,38 +109,43 @@ static int __init reserve_crashkernel_low(unsigned long long low_size)
> >>>   * This function reserves memory area given in "crashkernel=" kernel command
> >>>   * line parameter. The memory reserved is used by dump capture kernel when
> >>>   * primary kernel is crashing.
> >>> + *
> >>> + * NOTE: Reservation of crashkernel,low is special since its existence
> >>> + * is not independent, need rely on the existence of crashkernel,high.
> >>> + * Hence there are different cases for crashkernel,low reservation:
> > 
> > Considering to update the 3rd line as below:
> > 
> >  * NOTE: Reservation of crashkernel,low is special since its existence
> >  * is not independent, need rely on the existence of crashkernel,high.
> >  * Here, four cases of crashkernel,low reservation are summarized: 
> 
> OK. How about change "crashkernel,low" to "crashkernel low memory"?
> "crashkernel=Y,low", "crashkernel=,low" and "crashkernel,low" are very similar,
> may dazzle the reader.

Fine by me. 'crashkernel low memory' is formal, just make sentence a
little longer. Please take what you think fitter.

> 
> > 
> >>> + * 1) crashkernel=Y,low is specified explicitly, crashkernel,low takes Y;
> >>> + * 2) crashkernel=,low is not given, while crashkernel=,high is specified,
> >>> + *    take the default crashkernel,low value;
> >>> + * 3) crashkernel=X is specified, while fallback to get a memory region
> >>> + *    in high memory, take the default crashkernel,low value;
> >>> + * 4) crashkernel='invalid value',low is specified, failed the whole
> >>> + *    crashkernel reservation and bail out.
> >>>   */
> >>>  static void __init reserve_crashkernel(void)
> >>>  {
> >>>  	unsigned long long crash_base, crash_size;
> >>>  	unsigned long long crash_low_size;
> >>>  	unsigned long long crash_max = CRASH_ADDR_LOW_MAX;
> >>> -	int ret;
> >>>  	bool fixed_base, high = false;
> >>>  	char *cmdline = boot_command_line;
> >>> +	int ret;
> >>>  
> >>>  	/* crashkernel=X[@offset] */
> >>>  	ret = parse_crashkernel(cmdline, memblock_phys_mem_size(),
> >>>  				&crash_size, &crash_base);
> >>>  	if (ret || !crash_size) {
> >>> -		/* crashkernel=X,high */
> >>>  		ret = parse_crashkernel_high(cmdline, 0, &crash_size, &crash_base);
> >>>  		if (ret || !crash_size)
> >>>  			return;
> >>>  
> >>> -		/* crashkernel=Y,low */
> >>>  		ret = parse_crashkernel_low(cmdline, 0, &crash_low_size, &crash_base);
> >>>  		if (ret == -ENOENT)
> >>> -			/*
> >>> -			 * crashkernel=Y,low is not specified explicitly, use
> >>> -			 * default size automatically.
> >>> -			 */
> >>> +			/* case #2 of crashkernel,low reservation */
> >>>  			crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE;
> >>>  		else if (ret)
> >>> -			/* crashkernel=Y,low is specified but Y is invalid */
> >>> +			/* case #4 of crashkernel,low reservation */
> >>>  			return;
> >>>  
> >>> -		/* Mark crashkernel=X,high is specified */
> >>>  		high = true;
> >>>  		crash_max = CRASH_ADDR_HIGH_MAX;
> >>>  	}
> >>> @@ -148,7 +153,6 @@ static void __init reserve_crashkernel(void)
> >>>  	fixed_base = !!crash_base;
> >>>  	crash_size = PAGE_ALIGN(crash_size);
> >>>  
> >>> -	/* User specifies base address explicitly. */
> >>>  	if (fixed_base)
> >>>  		crash_max = crash_base + crash_size;
> >>>  
> >>> @@ -172,11 +176,7 @@ static void __init reserve_crashkernel(void)
> >>>  	}
> >>>  
> >>>  	if (crash_base >= SZ_4G) {
> >>> -		/*
> >>> -		 * For case crashkernel=X, low memory is not enough and fall
> >>> -		 * back to reserve specified size of memory above 4G, try to
> >>> -		 * allocate minimum required memory below 4G again.
> >>> -		 */
> >>> +		/* case #3 of crashkernel,low reservation */
> >>>  		if (!high)
> >>>  			crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE;
> >>>  
> >>>
> >>>>  
> >>>> -	/* Current arm64 boot protocol requires 2MB alignment */
> >>>> -	crash_base = memblock_phys_alloc_range(crash_size, SZ_2M,
> >>>> +retry:
> >>>> +	crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN,
> >>>>  					       crash_base, crash_max);
> >>>>  	if (!crash_base) {
> >>>> +		/*
> >>>> +		 * Attempt to fully allocate low memory failed, fall back
> >>>> +		 * to high memory, the minimum required low memory will be
> >>>> +		 * reserved later.
> >>>> +		 */
> >>>> +		if (!fixed_base && (crash_max == CRASH_ADDR_LOW_MAX)) {
> >>>> +			crash_max = CRASH_ADDR_HIGH_MAX;
> >>>> +			goto retry;
> >>>> +		}
> >>>> +
> >>>>  		pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
> >>>>  			crash_size);
> >>>>  		return;
> >>>>  	}
> >>>>  
> >>>> +	if (crash_base >= SZ_4G) {
> >>>> +		/*
> >>>> +		 * For case crashkernel=X, low memory is not enough and fall
> >>>> +		 * back to reserve specified size of memory above 4G, try to
> >>>> +		 * allocate minimum required memory below 4G again.
> >>>> +		 */
> >>>> +		if (!high)
> >>>> +			crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE;
> >>>> +
> >>>> +		if (reserve_crashkernel_low(crash_low_size)) {
> >>>> +			memblock_phys_free(crash_base, crash_size);
> >>>> +			return;
> >>>> +		}
> >>>> +	}
> >>>> +
> >>>>  	pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n",
> >>>>  		crash_base, crash_base + crash_size, crash_size >> 20);
> >>>>  
> >>>> @@ -107,6 +194,9 @@ static void __init reserve_crashkernel(void)
> >>>>  	 * map. Inform kmemleak so that it won't try to access it.
> >>>>  	 */
> >>>>  	kmemleak_ignore_phys(crash_base);
> >>>> +	if (crashk_low_res.end)
> >>>> +		kmemleak_ignore_phys(crashk_low_res.start);
> >>>> +
> >>>>  	crashk_res.start = crash_base;
> >>>>  	crashk_res.end = crash_base + crash_size - 1;
> >>>>  	insert_resource(&iomem_resource, &crashk_res);
> >>>> -- 
> >>>> 2.25.1
> >>>>
> >>>
> >>> .
> >>>
> >>
> >> -- 
> >> Regards,
> >>   Zhen Lei
> >>
> > 
> > .
> > 
> 
> -- 
> Regards,
>   Zhen Lei
>
Leizhen (ThunderTown) March 17, 2022, 7:30 a.m. UTC | #8
On 2022/3/17 11:47, Baoquan He wrote:
> On 03/17/22 at 11:19am, Leizhen (ThunderTown) wrote:
>>
>>
>> On 2022/3/17 10:36, Baoquan He wrote:
>>> On 03/16/22 at 09:11pm, Leizhen (ThunderTown) wrote:
>>>>
>>>>
>>>> On 2022/3/16 20:11, Baoquan He wrote:
>>>>> On 02/27/22 at 11:07am, Zhen Lei wrote:
>>> ...... 
>>>
>>>>> Hi leizhen,
>>>>>
>>>>> I made change on reserve_crashkenrel(), inline comment may be slow.
>>>>> Please check and consider if they can be taken.
>>>>
>>>> That's great. Thank you very much.
>>>>
>>>>>
>>>>> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
>>>>> index 30ae6638ff54..f96351da1e3e 100644
>>>>> --- a/arch/arm64/mm/init.c
>>>>> +++ b/arch/arm64/mm/init.c
>>>>> @@ -109,38 +109,43 @@ static int __init reserve_crashkernel_low(unsigned long long low_size)
>>>>>   * This function reserves memory area given in "crashkernel=" kernel command
>>>>>   * line parameter. The memory reserved is used by dump capture kernel when
>>>>>   * primary kernel is crashing.
>>>>> + *
>>>>> + * NOTE: Reservation of crashkernel,low is special since its existence
>>>>> + * is not independent, need rely on the existence of crashkernel,high.
>>>>> + * Hence there are different cases for crashkernel,low reservation:
>>>
>>> Considering to update the 3rd line as below:
>>>
>>>  * NOTE: Reservation of crashkernel,low is special since its existence
>>>  * is not independent, need rely on the existence of crashkernel,high.
>>>  * Here, four cases of crashkernel,low reservation are summarized: 
>>
>> OK. How about change "crashkernel,low" to "crashkernel low memory"?
>> "crashkernel=Y,low", "crashkernel=,low" and "crashkernel,low" are very similar,
>> may dazzle the reader.
> 
> Fine by me. 'crashkernel low memory' is formal, just make sentence a
> little longer. Please take what you think fitter.

OK, I will send v22 after v5.18-rc1.

> 
>>
>>>
>>>>> + * 1) crashkernel=Y,low is specified explicitly, crashkernel,low takes Y;
>>>>> + * 2) crashkernel=,low is not given, while crashkernel=,high is specified,
>>>>> + *    take the default crashkernel,low value;
>>>>> + * 3) crashkernel=X is specified, while fallback to get a memory region
>>>>> + *    in high memory, take the default crashkernel,low value;
>>>>> + * 4) crashkernel='invalid value',low is specified, failed the whole
>>>>> + *    crashkernel reservation and bail out.
>>>>>   */
>>>>>  static void __init reserve_crashkernel(void)
>>>>>  {
>>>>>  	unsigned long long crash_base, crash_size;
>>>>>  	unsigned long long crash_low_size;
>>>>>  	unsigned long long crash_max = CRASH_ADDR_LOW_MAX;
>>>>> -	int ret;
>>>>>  	bool fixed_base, high = false;
>>>>>  	char *cmdline = boot_command_line;
>>>>> +	int ret;
>>>>>  
>>>>>  	/* crashkernel=X[@offset] */
>>>>>  	ret = parse_crashkernel(cmdline, memblock_phys_mem_size(),
>>>>>  				&crash_size, &crash_base);
>>>>>  	if (ret || !crash_size) {
>>>>> -		/* crashkernel=X,high */
>>>>>  		ret = parse_crashkernel_high(cmdline, 0, &crash_size, &crash_base);
>>>>>  		if (ret || !crash_size)
>>>>>  			return;
>>>>>  
>>>>> -		/* crashkernel=Y,low */
>>>>>  		ret = parse_crashkernel_low(cmdline, 0, &crash_low_size, &crash_base);
>>>>>  		if (ret == -ENOENT)
>>>>> -			/*
>>>>> -			 * crashkernel=Y,low is not specified explicitly, use
>>>>> -			 * default size automatically.
>>>>> -			 */
>>>>> +			/* case #2 of crashkernel,low reservation */
>>>>>  			crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE;
>>>>>  		else if (ret)
>>>>> -			/* crashkernel=Y,low is specified but Y is invalid */
>>>>> +			/* case #4 of crashkernel,low reservation */
>>>>>  			return;
>>>>>  
>>>>> -		/* Mark crashkernel=X,high is specified */
>>>>>  		high = true;
>>>>>  		crash_max = CRASH_ADDR_HIGH_MAX;
>>>>>  	}
>>>>> @@ -148,7 +153,6 @@ static void __init reserve_crashkernel(void)
>>>>>  	fixed_base = !!crash_base;
>>>>>  	crash_size = PAGE_ALIGN(crash_size);
>>>>>  
>>>>> -	/* User specifies base address explicitly. */
>>>>>  	if (fixed_base)
>>>>>  		crash_max = crash_base + crash_size;
>>>>>  
>>>>> @@ -172,11 +176,7 @@ static void __init reserve_crashkernel(void)
>>>>>  	}
>>>>>  
>>>>>  	if (crash_base >= SZ_4G) {
>>>>> -		/*
>>>>> -		 * For case crashkernel=X, low memory is not enough and fall
>>>>> -		 * back to reserve specified size of memory above 4G, try to
>>>>> -		 * allocate minimum required memory below 4G again.
>>>>> -		 */
>>>>> +		/* case #3 of crashkernel,low reservation */
>>>>>  		if (!high)
>>>>>  			crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE;
>>>>>  
>>>>>
>>>>>>  
>>>>>> -	/* Current arm64 boot protocol requires 2MB alignment */
>>>>>> -	crash_base = memblock_phys_alloc_range(crash_size, SZ_2M,
>>>>>> +retry:
>>>>>> +	crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN,
>>>>>>  					       crash_base, crash_max);
>>>>>>  	if (!crash_base) {
>>>>>> +		/*
>>>>>> +		 * Attempt to fully allocate low memory failed, fall back
>>>>>> +		 * to high memory, the minimum required low memory will be
>>>>>> +		 * reserved later.
>>>>>> +		 */
>>>>>> +		if (!fixed_base && (crash_max == CRASH_ADDR_LOW_MAX)) {
>>>>>> +			crash_max = CRASH_ADDR_HIGH_MAX;
>>>>>> +			goto retry;
>>>>>> +		}
>>>>>> +
>>>>>>  		pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
>>>>>>  			crash_size);
>>>>>>  		return;
>>>>>>  	}
>>>>>>  
>>>>>> +	if (crash_base >= SZ_4G) {
>>>>>> +		/*
>>>>>> +		 * For case crashkernel=X, low memory is not enough and fall
>>>>>> +		 * back to reserve specified size of memory above 4G, try to
>>>>>> +		 * allocate minimum required memory below 4G again.
>>>>>> +		 */
>>>>>> +		if (!high)
>>>>>> +			crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE;
>>>>>> +
>>>>>> +		if (reserve_crashkernel_low(crash_low_size)) {
>>>>>> +			memblock_phys_free(crash_base, crash_size);
>>>>>> +			return;
>>>>>> +		}
>>>>>> +	}
>>>>>> +
>>>>>>  	pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n",
>>>>>>  		crash_base, crash_base + crash_size, crash_size >> 20);
>>>>>>  
>>>>>> @@ -107,6 +194,9 @@ static void __init reserve_crashkernel(void)
>>>>>>  	 * map. Inform kmemleak so that it won't try to access it.
>>>>>>  	 */
>>>>>>  	kmemleak_ignore_phys(crash_base);
>>>>>> +	if (crashk_low_res.end)
>>>>>> +		kmemleak_ignore_phys(crashk_low_res.start);
>>>>>> +
>>>>>>  	crashk_res.start = crash_base;
>>>>>>  	crashk_res.end = crash_base + crash_size - 1;
>>>>>>  	insert_resource(&iomem_resource, &crashk_res);
>>>>>> -- 
>>>>>> 2.25.1
>>>>>>
>>>>>
>>>>> .
>>>>>
>>>>
>>>> -- 
>>>> Regards,
>>>>   Zhen Lei
>>>>
>>>
>>> .
>>>
>>
>> -- 
>> Regards,
>>   Zhen Lei
>>
> 
> .
>
John Donnelly March 21, 2022, 1:29 p.m. UTC | #9
On 2/26/22 9:07 PM, Zhen Lei wrote:
> From: Chen Zhou <chenzhou10@huawei.com>
> 
> There are following issues in arm64 kdump:
> 1. We use crashkernel=X to reserve crashkernel below 4G, which
> will fail when there is no enough low memory.

                         " Not enough "
> 2. If reserving crashkernel above 4G, in this case, crash dump
> kernel will boot failure because there is no low memory available
> for allocation.

  We can't have a "boot failure". If the requested reservation
  can not be met,  the kdump  configuration is not setup.
> 
> To solve these issues, change the behavior of crashkernel=X and
> introduce crashkernel=X,[high,low]. crashkernel=X tries low allocation
> in DMA zone, and fall back to high allocation if it fails.
> We can also use "crashkernel=X,high" to select a region above DMA zone,
> which also tries to allocate at least 256M in DMA zone automatically.
> "crashkernel=Y,low" can be used to allocate specified size low memory.

Is there going to be documentation on what values certain Arm platforms 
are going to use this on ?

> 
> Signed-off-by: Chen Zhou <chenzhou10@huawei.com>
> Co-developed-by: Zhen Lei <thunder.leizhen@huawei.com>
> Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
> ---
>   arch/arm64/kernel/machine_kexec.c      |   9 ++-
>   arch/arm64/kernel/machine_kexec_file.c |  12 ++-
>   arch/arm64/mm/init.c                   | 106 +++++++++++++++++++++++--
>   3 files changed, 115 insertions(+), 12 deletions(-)
> 
> diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
> index e16b248699d5c3c..19c2d487cb08feb 100644
> --- a/arch/arm64/kernel/machine_kexec.c
> +++ b/arch/arm64/kernel/machine_kexec.c
> @@ -329,8 +329,13 @@ bool crash_is_nosave(unsigned long pfn)
>   
>   	/* in reserved memory? */
>   	addr = __pfn_to_phys(pfn);
> -	if ((addr < crashk_res.start) || (crashk_res.end < addr))
> -		return false;
> +	if ((addr < crashk_res.start) || (crashk_res.end < addr)) {
> +		if (!crashk_low_res.end)
> +			return false;
> +
> +		if ((addr < crashk_low_res.start) || (crashk_low_res.end < addr))
> +			return false;
> +	}
>   
>   	if (!kexec_crash_image)
>   		return true;
> diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c
> index 59c648d51848886..889951291cc0f9c 100644
> --- a/arch/arm64/kernel/machine_kexec_file.c
> +++ b/arch/arm64/kernel/machine_kexec_file.c
> @@ -65,10 +65,18 @@ static int prepare_elf_headers(void **addr, unsigned long *sz)
>   
>   	/* Exclude crashkernel region */
>   	ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end);
> +	if (ret)
> +		goto out;
> +
> +	if (crashk_low_res.end) {
> +		ret = crash_exclude_mem_range(cmem, crashk_low_res.start, crashk_low_res.end);
> +		if (ret)
> +			goto out;
> +	}
>   
> -	if (!ret)
> -		ret =  crash_prepare_elf64_headers(cmem, true, addr, sz);
> +	ret = crash_prepare_elf64_headers(cmem, true, addr, sz);
>   
> +out:
>   	kfree(cmem);
>   	return ret;
>   }
> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
> index 90f276d46b93bc6..30ae6638ff54c47 100644
> --- a/arch/arm64/mm/init.c
> +++ b/arch/arm64/mm/init.c
> @@ -65,6 +65,44 @@ EXPORT_SYMBOL(memstart_addr);
>   phys_addr_t arm64_dma_phys_limit __ro_after_init;
>   
>   #ifdef CONFIG_KEXEC_CORE
> +/* Current arm64 boot protocol requires 2MB alignment */
> +#define CRASH_ALIGN			SZ_2M
> +
> +#define CRASH_ADDR_LOW_MAX		arm64_dma_phys_limit
> +#define CRASH_ADDR_HIGH_MAX		memblock.current_limit
> +
> +/*
> + * This is an empirical value in x86_64 and taken here directly. Please
> + * refer to the code comment in reserve_crashkernel_low() of x86_64 for more
> + * details.
> + */
> +#define DEFAULT_CRASH_KERNEL_LOW_SIZE	\
> +	max(swiotlb_size_or_default() + (8UL << 20), 256UL << 20)
> +
> +static int __init reserve_crashkernel_low(unsigned long long low_size)
> +{
> +	unsigned long long low_base;
> +
> +	/* passed with crashkernel=0,low ? */
> +	if (!low_size)
> +		return 0;
> +
> +	low_base = memblock_phys_alloc_range(low_size, CRASH_ALIGN, 0, CRASH_ADDR_LOW_MAX);
> +	if (!low_base) {
> +		pr_err("cannot allocate crashkernel low memory (size:0x%llx).\n", low_size);
> +		return -ENOMEM;
> +	}
> +
> +	pr_info("crashkernel low memory reserved: 0x%08llx - 0x%08llx (%lld MB)\n",
> +		low_base, low_base + low_size, low_size >> 20);
> +
> +	crashk_low_res.start = low_base;
> +	crashk_low_res.end   = low_base + low_size - 1;
> +	insert_resource(&iomem_resource, &crashk_low_res);
> +
> +	return 0;
> +}
> +
>   /*
>    * reserve_crashkernel() - reserves memory for crash kernel
>    *
> @@ -75,30 +113,79 @@ phys_addr_t arm64_dma_phys_limit __ro_after_init;
>   static void __init reserve_crashkernel(void)
>   {
>   	unsigned long long crash_base, crash_size;
> -	unsigned long long crash_max = arm64_dma_phys_limit;
> +	unsigned long long crash_low_size;
> +	unsigned long long crash_max = CRASH_ADDR_LOW_MAX;
>   	int ret;
> +	bool fixed_base, high = false;
> +	char *cmdline = boot_command_line;
>   
> -	ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
> +	/* crashkernel=X[@offset] */
> +	ret = parse_crashkernel(cmdline, memblock_phys_mem_size(),
>   				&crash_size, &crash_base);
> -	/* no crashkernel= or invalid value specified */
> -	if (ret || !crash_size)
> -		return;
> +	if (ret || !crash_size) {
> +		/* crashkernel=X,high */
> +		ret = parse_crashkernel_high(cmdline, 0, &crash_size, &crash_base);
> +		if (ret || !crash_size)
> +			return;
> +
> +		/* crashkernel=Y,low */
> +		ret = parse_crashkernel_low(cmdline, 0, &crash_low_size, &crash_base);
> +		if (ret == -ENOENT)
> +			/*
> +			 * crashkernel=Y,low is not specified explicitly, use
> +			 * default size automatically.
> +			 */
> +			crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE;
> +		else if (ret)
> +			/* crashkernel=Y,low is specified but Y is invalid */
> +			return;
> +
> +		/* Mark crashkernel=X,high is specified */
> +		high = true;
> +		crash_max = CRASH_ADDR_HIGH_MAX;
> +	}
>   
> +	fixed_base = !!crash_base;
>   	crash_size = PAGE_ALIGN(crash_size);
>   
>   	/* User specifies base address explicitly. */
> -	if (crash_base)
> +	if (fixed_base)
>   		crash_max = crash_base + crash_size;
>   
> -	/* Current arm64 boot protocol requires 2MB alignment */
> -	crash_base = memblock_phys_alloc_range(crash_size, SZ_2M,
> +retry:
> +	crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN,
>   					       crash_base, crash_max);
>   	if (!crash_base) {
> +		/*
> +		 * Attempt to fully allocate low memory failed, fall back
> +		 * to high memory, the minimum required low memory will be
> +		 * reserved later.
> +		 */
> +		if (!fixed_base && (crash_max == CRASH_ADDR_LOW_MAX)) {
> +			crash_max = CRASH_ADDR_HIGH_MAX;
> +			goto retry;
> +		}
> +
>   		pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
>   			crash_size);
>   		return;
>   	}
>   
> +	if (crash_base >= SZ_4G) {
> +		/*
> +		 * For case crashkernel=X, low memory is not enough and fall
> +		 * back to reserve specified size of memory above 4G, try to
> +		 * allocate minimum required memory below 4G again.
> +		 */
> +		if (!high)
> +			crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE;
> +
> +		if (reserve_crashkernel_low(crash_low_size)) {
> +			memblock_phys_free(crash_base, crash_size);
> +			return;
> +		}
> +	}
> +
>   	pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n",
>   		crash_base, crash_base + crash_size, crash_size >> 20);
>   
> @@ -107,6 +194,9 @@ static void __init reserve_crashkernel(void)
>   	 * map. Inform kmemleak so that it won't try to access it.
>   	 */
>   	kmemleak_ignore_phys(crash_base);
> +	if (crashk_low_res.end)
> +		kmemleak_ignore_phys(crashk_low_res.start);
> +
>   	crashk_res.start = crash_base;
>   	crashk_res.end = crash_base + crash_size - 1;
>   	insert_resource(&iomem_resource, &crashk_res);
Dave Kleikamp March 21, 2022, 2:09 p.m. UTC | #10
On 3/21/22 8:29AM, John Donnelly wrote:
> On 2/26/22 9:07 PM, Zhen Lei wrote:
>> From: Chen Zhou <chenzhou10@huawei.com>
>>
>> There are following issues in arm64 kdump:
>> 1. We use crashkernel=X to reserve crashkernel below 4G, which
>> will fail when there is no enough low memory.
> 
>                          " Not enough "
>> 2. If reserving crashkernel above 4G, in this case, crash dump
>> kernel will boot failure because there is no low memory available
>> for allocation.
> 
>   We can't have a "boot failure". If the requested reservation
>   can not be met,  the kdump  configuration is not setup.

I think you misread this. Without these patches, if only high memory is 
reserved for the crash kernel, then the crash kernel will fail to boot.

>>
>> To solve these issues, change the behavior of crashkernel=X and
>> introduce crashkernel=X,[high,low]. crashkernel=X tries low allocation
>> in DMA zone, and fall back to high allocation if it fails.
>> We can also use "crashkernel=X,high" to select a region above DMA zone,
>> which also tries to allocate at least 256M in DMA zone automatically.
>> "crashkernel=Y,low" can be used to allocate specified size low memory.
> 
> Is there going to be documentation on what values certain Arm platforms 
> are going to use this on ?
> 
>>
>> Signed-off-by: Chen Zhou <chenzhou10@huawei.com>
>> Co-developed-by: Zhen Lei <thunder.leizhen@huawei.com>
>> Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Leizhen (ThunderTown) March 22, 2022, 1:58 a.m. UTC | #11
On 2022/3/21 21:29, John Donnelly wrote:
> On 2/26/22 9:07 PM, Zhen Lei wrote:
>> From: Chen Zhou <chenzhou10@huawei.com>
>>
>> There are following issues in arm64 kdump:
>> 1. We use crashkernel=X to reserve crashkernel below 4G, which
>> will fail when there is no enough low memory.
> 
>                         " Not enough "

OK, thanks

>> 2. If reserving crashkernel above 4G, in this case, crash dump
>> kernel will boot failure because there is no low memory available
>> for allocation.
> 
>  We can't have a "boot failure". If the requested reservation
>  can not be met,  the kdump  configuration is not setup.
>>
>> To solve these issues, change the behavior of crashkernel=X and
>> introduce crashkernel=X,[high,low]. crashkernel=X tries low allocation
>> in DMA zone, and fall back to high allocation if it fails.
>> We can also use "crashkernel=X,high" to select a region above DMA zone,
>> which also tries to allocate at least 256M in DMA zone automatically.
>> "crashkernel=Y,low" can be used to allocate specified size low memory.
> 
> Is there going to be documentation on what values certain Arm platforms are going to use this on ?

There is no exact formula.

> 
>>
>> Signed-off-by: Chen Zhou <chenzhou10@huawei.com>
>> Co-developed-by: Zhen Lei <thunder.leizhen@huawei.com>
>> Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
>> ---
>>   arch/arm64/kernel/machine_kexec.c      |   9 ++-
>>   arch/arm64/kernel/machine_kexec_file.c |  12 ++-
>>   arch/arm64/mm/init.c                   | 106 +++++++++++++++++++++++--
>>   3 files changed, 115 insertions(+), 12 deletions(-)
>>
>> diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
>> index e16b248699d5c3c..19c2d487cb08feb 100644
>> --- a/arch/arm64/kernel/machine_kexec.c
>> +++ b/arch/arm64/kernel/machine_kexec.c
>> @@ -329,8 +329,13 @@ bool crash_is_nosave(unsigned long pfn)
>>         /* in reserved memory? */
>>       addr = __pfn_to_phys(pfn);
>> -    if ((addr < crashk_res.start) || (crashk_res.end < addr))
>> -        return false;
>> +    if ((addr < crashk_res.start) || (crashk_res.end < addr)) {
>> +        if (!crashk_low_res.end)
>> +            return false;
>> +
>> +        if ((addr < crashk_low_res.start) || (crashk_low_res.end < addr))
>> +            return false;
>> +    }
>>         if (!kexec_crash_image)
>>           return true;
>> diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c
>> index 59c648d51848886..889951291cc0f9c 100644
>> --- a/arch/arm64/kernel/machine_kexec_file.c
>> +++ b/arch/arm64/kernel/machine_kexec_file.c
>> @@ -65,10 +65,18 @@ static int prepare_elf_headers(void **addr, unsigned long *sz)
>>         /* Exclude crashkernel region */
>>       ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end);
>> +    if (ret)
>> +        goto out;
>> +
>> +    if (crashk_low_res.end) {
>> +        ret = crash_exclude_mem_range(cmem, crashk_low_res.start, crashk_low_res.end);
>> +        if (ret)
>> +            goto out;
>> +    }
>>   -    if (!ret)
>> -        ret =  crash_prepare_elf64_headers(cmem, true, addr, sz);
>> +    ret = crash_prepare_elf64_headers(cmem, true, addr, sz);
>>   +out:
>>       kfree(cmem);
>>       return ret;
>>   }
>> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
>> index 90f276d46b93bc6..30ae6638ff54c47 100644
>> --- a/arch/arm64/mm/init.c
>> +++ b/arch/arm64/mm/init.c
>> @@ -65,6 +65,44 @@ EXPORT_SYMBOL(memstart_addr);
>>   phys_addr_t arm64_dma_phys_limit __ro_after_init;
>>     #ifdef CONFIG_KEXEC_CORE
>> +/* Current arm64 boot protocol requires 2MB alignment */
>> +#define CRASH_ALIGN            SZ_2M
>> +
>> +#define CRASH_ADDR_LOW_MAX        arm64_dma_phys_limit
>> +#define CRASH_ADDR_HIGH_MAX        memblock.current_limit
>> +
>> +/*
>> + * This is an empirical value in x86_64 and taken here directly. Please
>> + * refer to the code comment in reserve_crashkernel_low() of x86_64 for more
>> + * details.
>> + */
>> +#define DEFAULT_CRASH_KERNEL_LOW_SIZE    \
>> +    max(swiotlb_size_or_default() + (8UL << 20), 256UL << 20)
>> +
>> +static int __init reserve_crashkernel_low(unsigned long long low_size)
>> +{
>> +    unsigned long long low_base;
>> +
>> +    /* passed with crashkernel=0,low ? */
>> +    if (!low_size)
>> +        return 0;
>> +
>> +    low_base = memblock_phys_alloc_range(low_size, CRASH_ALIGN, 0, CRASH_ADDR_LOW_MAX);
>> +    if (!low_base) {
>> +        pr_err("cannot allocate crashkernel low memory (size:0x%llx).\n", low_size);
>> +        return -ENOMEM;
>> +    }
>> +
>> +    pr_info("crashkernel low memory reserved: 0x%08llx - 0x%08llx (%lld MB)\n",
>> +        low_base, low_base + low_size, low_size >> 20);
>> +
>> +    crashk_low_res.start = low_base;
>> +    crashk_low_res.end   = low_base + low_size - 1;
>> +    insert_resource(&iomem_resource, &crashk_low_res);
>> +
>> +    return 0;
>> +}
>> +
>>   /*
>>    * reserve_crashkernel() - reserves memory for crash kernel
>>    *
>> @@ -75,30 +113,79 @@ phys_addr_t arm64_dma_phys_limit __ro_after_init;
>>   static void __init reserve_crashkernel(void)
>>   {
>>       unsigned long long crash_base, crash_size;
>> -    unsigned long long crash_max = arm64_dma_phys_limit;
>> +    unsigned long long crash_low_size;
>> +    unsigned long long crash_max = CRASH_ADDR_LOW_MAX;
>>       int ret;
>> +    bool fixed_base, high = false;
>> +    char *cmdline = boot_command_line;
>>   -    ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
>> +    /* crashkernel=X[@offset] */
>> +    ret = parse_crashkernel(cmdline, memblock_phys_mem_size(),
>>                   &crash_size, &crash_base);
>> -    /* no crashkernel= or invalid value specified */
>> -    if (ret || !crash_size)
>> -        return;
>> +    if (ret || !crash_size) {
>> +        /* crashkernel=X,high */
>> +        ret = parse_crashkernel_high(cmdline, 0, &crash_size, &crash_base);
>> +        if (ret || !crash_size)
>> +            return;
>> +
>> +        /* crashkernel=Y,low */
>> +        ret = parse_crashkernel_low(cmdline, 0, &crash_low_size, &crash_base);
>> +        if (ret == -ENOENT)
>> +            /*
>> +             * crashkernel=Y,low is not specified explicitly, use
>> +             * default size automatically.
>> +             */
>> +            crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE;
>> +        else if (ret)
>> +            /* crashkernel=Y,low is specified but Y is invalid */
>> +            return;
>> +
>> +        /* Mark crashkernel=X,high is specified */
>> +        high = true;
>> +        crash_max = CRASH_ADDR_HIGH_MAX;
>> +    }
>>   +    fixed_base = !!crash_base;
>>       crash_size = PAGE_ALIGN(crash_size);
>>         /* User specifies base address explicitly. */
>> -    if (crash_base)
>> +    if (fixed_base)
>>           crash_max = crash_base + crash_size;
>>   -    /* Current arm64 boot protocol requires 2MB alignment */
>> -    crash_base = memblock_phys_alloc_range(crash_size, SZ_2M,
>> +retry:
>> +    crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN,
>>                              crash_base, crash_max);
>>       if (!crash_base) {
>> +        /*
>> +         * Attempt to fully allocate low memory failed, fall back
>> +         * to high memory, the minimum required low memory will be
>> +         * reserved later.
>> +         */
>> +        if (!fixed_base && (crash_max == CRASH_ADDR_LOW_MAX)) {
>> +            crash_max = CRASH_ADDR_HIGH_MAX;
>> +            goto retry;
>> +        }
>> +
>>           pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
>>               crash_size);
>>           return;
>>       }
>>   +    if (crash_base >= SZ_4G) {
>> +        /*
>> +         * For case crashkernel=X, low memory is not enough and fall
>> +         * back to reserve specified size of memory above 4G, try to
>> +         * allocate minimum required memory below 4G again.
>> +         */
>> +        if (!high)
>> +            crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE;
>> +
>> +        if (reserve_crashkernel_low(crash_low_size)) {
>> +            memblock_phys_free(crash_base, crash_size);
>> +            return;
>> +        }
>> +    }
>> +
>>       pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n",
>>           crash_base, crash_base + crash_size, crash_size >> 20);
>>   @@ -107,6 +194,9 @@ static void __init reserve_crashkernel(void)
>>        * map. Inform kmemleak so that it won't try to access it.
>>        */
>>       kmemleak_ignore_phys(crash_base);
>> +    if (crashk_low_res.end)
>> +        kmemleak_ignore_phys(crashk_low_res.start);
>> +
>>       crashk_res.start = crash_base;
>>       crashk_res.end = crash_base + crash_size - 1;
>>       insert_resource(&iomem_resource, &crashk_res);
> 
> .
>
diff mbox series

Patch

diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
index e16b248699d5c3c..19c2d487cb08feb 100644
--- a/arch/arm64/kernel/machine_kexec.c
+++ b/arch/arm64/kernel/machine_kexec.c
@@ -329,8 +329,13 @@  bool crash_is_nosave(unsigned long pfn)
 
 	/* in reserved memory? */
 	addr = __pfn_to_phys(pfn);
-	if ((addr < crashk_res.start) || (crashk_res.end < addr))
-		return false;
+	if ((addr < crashk_res.start) || (crashk_res.end < addr)) {
+		if (!crashk_low_res.end)
+			return false;
+
+		if ((addr < crashk_low_res.start) || (crashk_low_res.end < addr))
+			return false;
+	}
 
 	if (!kexec_crash_image)
 		return true;
diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c
index 59c648d51848886..889951291cc0f9c 100644
--- a/arch/arm64/kernel/machine_kexec_file.c
+++ b/arch/arm64/kernel/machine_kexec_file.c
@@ -65,10 +65,18 @@  static int prepare_elf_headers(void **addr, unsigned long *sz)
 
 	/* Exclude crashkernel region */
 	ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end);
+	if (ret)
+		goto out;
+
+	if (crashk_low_res.end) {
+		ret = crash_exclude_mem_range(cmem, crashk_low_res.start, crashk_low_res.end);
+		if (ret)
+			goto out;
+	}
 
-	if (!ret)
-		ret =  crash_prepare_elf64_headers(cmem, true, addr, sz);
+	ret = crash_prepare_elf64_headers(cmem, true, addr, sz);
 
+out:
 	kfree(cmem);
 	return ret;
 }
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 90f276d46b93bc6..30ae6638ff54c47 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -65,6 +65,44 @@  EXPORT_SYMBOL(memstart_addr);
 phys_addr_t arm64_dma_phys_limit __ro_after_init;
 
 #ifdef CONFIG_KEXEC_CORE
+/* Current arm64 boot protocol requires 2MB alignment */
+#define CRASH_ALIGN			SZ_2M
+
+#define CRASH_ADDR_LOW_MAX		arm64_dma_phys_limit
+#define CRASH_ADDR_HIGH_MAX		memblock.current_limit
+
+/*
+ * This is an empirical value in x86_64 and taken here directly. Please
+ * refer to the code comment in reserve_crashkernel_low() of x86_64 for more
+ * details.
+ */
+#define DEFAULT_CRASH_KERNEL_LOW_SIZE	\
+	max(swiotlb_size_or_default() + (8UL << 20), 256UL << 20)
+
+static int __init reserve_crashkernel_low(unsigned long long low_size)
+{
+	unsigned long long low_base;
+
+	/* passed with crashkernel=0,low ? */
+	if (!low_size)
+		return 0;
+
+	low_base = memblock_phys_alloc_range(low_size, CRASH_ALIGN, 0, CRASH_ADDR_LOW_MAX);
+	if (!low_base) {
+		pr_err("cannot allocate crashkernel low memory (size:0x%llx).\n", low_size);
+		return -ENOMEM;
+	}
+
+	pr_info("crashkernel low memory reserved: 0x%08llx - 0x%08llx (%lld MB)\n",
+		low_base, low_base + low_size, low_size >> 20);
+
+	crashk_low_res.start = low_base;
+	crashk_low_res.end   = low_base + low_size - 1;
+	insert_resource(&iomem_resource, &crashk_low_res);
+
+	return 0;
+}
+
 /*
  * reserve_crashkernel() - reserves memory for crash kernel
  *
@@ -75,30 +113,79 @@  phys_addr_t arm64_dma_phys_limit __ro_after_init;
 static void __init reserve_crashkernel(void)
 {
 	unsigned long long crash_base, crash_size;
-	unsigned long long crash_max = arm64_dma_phys_limit;
+	unsigned long long crash_low_size;
+	unsigned long long crash_max = CRASH_ADDR_LOW_MAX;
 	int ret;
+	bool fixed_base, high = false;
+	char *cmdline = boot_command_line;
 
-	ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
+	/* crashkernel=X[@offset] */
+	ret = parse_crashkernel(cmdline, memblock_phys_mem_size(),
 				&crash_size, &crash_base);
-	/* no crashkernel= or invalid value specified */
-	if (ret || !crash_size)
-		return;
+	if (ret || !crash_size) {
+		/* crashkernel=X,high */
+		ret = parse_crashkernel_high(cmdline, 0, &crash_size, &crash_base);
+		if (ret || !crash_size)
+			return;
+
+		/* crashkernel=Y,low */
+		ret = parse_crashkernel_low(cmdline, 0, &crash_low_size, &crash_base);
+		if (ret == -ENOENT)
+			/*
+			 * crashkernel=Y,low is not specified explicitly, use
+			 * default size automatically.
+			 */
+			crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE;
+		else if (ret)
+			/* crashkernel=Y,low is specified but Y is invalid */
+			return;
+
+		/* Mark crashkernel=X,high is specified */
+		high = true;
+		crash_max = CRASH_ADDR_HIGH_MAX;
+	}
 
+	fixed_base = !!crash_base;
 	crash_size = PAGE_ALIGN(crash_size);
 
 	/* User specifies base address explicitly. */
-	if (crash_base)
+	if (fixed_base)
 		crash_max = crash_base + crash_size;
 
-	/* Current arm64 boot protocol requires 2MB alignment */
-	crash_base = memblock_phys_alloc_range(crash_size, SZ_2M,
+retry:
+	crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN,
 					       crash_base, crash_max);
 	if (!crash_base) {
+		/*
+		 * Attempt to fully allocate low memory failed, fall back
+		 * to high memory, the minimum required low memory will be
+		 * reserved later.
+		 */
+		if (!fixed_base && (crash_max == CRASH_ADDR_LOW_MAX)) {
+			crash_max = CRASH_ADDR_HIGH_MAX;
+			goto retry;
+		}
+
 		pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
 			crash_size);
 		return;
 	}
 
+	if (crash_base >= SZ_4G) {
+		/*
+		 * For case crashkernel=X, low memory is not enough and fall
+		 * back to reserve specified size of memory above 4G, try to
+		 * allocate minimum required memory below 4G again.
+		 */
+		if (!high)
+			crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE;
+
+		if (reserve_crashkernel_low(crash_low_size)) {
+			memblock_phys_free(crash_base, crash_size);
+			return;
+		}
+	}
+
 	pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n",
 		crash_base, crash_base + crash_size, crash_size >> 20);
 
@@ -107,6 +194,9 @@  static void __init reserve_crashkernel(void)
 	 * map. Inform kmemleak so that it won't try to access it.
 	 */
 	kmemleak_ignore_phys(crash_base);
+	if (crashk_low_res.end)
+		kmemleak_ignore_phys(crashk_low_res.start);
+
 	crashk_res.start = crash_base;
 	crashk_res.end = crash_base + crash_size - 1;
 	insert_resource(&iomem_resource, &crashk_res);