diff mbox series

[v20,3/5] arm64: kdump: reimplement crashkernel=X

Message ID 20220124084708.683-4-thunder.leizhen@huawei.com (mailing list archive)
State New, archived
Headers show
Series support reserving crashkernel above 4G on arm64 kdump | expand

Commit Message

Leizhen (ThunderTown) Jan. 24, 2022, 8:47 a.m. UTC
From: Chen Zhou <chenzhou10@huawei.com>

There are following issues in arm64 kdump:
1. We use crashkernel=X to reserve crashkernel below 4G, which
will fail when there is no enough low memory.
2. If reserving crashkernel above 4G, in this case, crash dump
kernel will boot failure because there is no low memory available
for allocation.

To solve these issues, change the behavior of crashkernel=X and
introduce crashkernel=X,[high,low]. crashkernel=X tries low allocation
in DMA zone, and fall back to high allocation if it fails.
We can also use "crashkernel=X,high" to select a region above DMA zone,
which also tries to allocate at least 256M in DMA zone automatically.
"crashkernel=Y,low" can be used to allocate specified size low memory.

Signed-off-by: Chen Zhou <chenzhou10@huawei.com>
Co-developed-by: Zhen Lei <thunder.leizhen@huawei.com>
Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
---
 arch/arm64/kernel/machine_kexec.c      |  9 +++-
 arch/arm64/kernel/machine_kexec_file.c | 12 ++++-
 arch/arm64/mm/init.c                   | 68 ++++++++++++++++++++++++--
 3 files changed, 81 insertions(+), 8 deletions(-)

Comments

John Donnelly Jan. 26, 2022, 3:18 p.m. UTC | #1
On 1/24/22 2:47 AM, Zhen Lei wrote:
> From: Chen Zhou <chenzhou10@huawei.com>
> 
> There are following issues in arm64 kdump:
> 1. We use crashkernel=X to reserve crashkernel below 4G, which
> will fail when there is no enough low memory.
> 2. If reserving crashkernel above 4G, in this case, crash dump
> kernel will boot failure because there is no low memory available
> for allocation.
> 
> To solve these issues, change the behavior of crashkernel=X and
> introduce crashkernel=X,[high,low]. crashkernel=X tries low allocation
> in DMA zone, and fall back to high allocation if it fails.
> We can also use "crashkernel=X,high" to select a region above DMA zone,
> which also tries to allocate at least 256M in DMA zone automatically.
> "crashkernel=Y,low" can be used to allocate specified size low memory.
> 
> Signed-off-by: Chen Zhou <chenzhou10@huawei.com>
> Co-developed-by: Zhen Lei <thunder.leizhen@huawei.com>
> Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>


Acked-by: John Donnelly  <john.p.donnelly@oracle.com>

> ---
>   arch/arm64/kernel/machine_kexec.c      |  9 +++-
>   arch/arm64/kernel/machine_kexec_file.c | 12 ++++-
>   arch/arm64/mm/init.c                   | 68 ++++++++++++++++++++++++--
>   3 files changed, 81 insertions(+), 8 deletions(-)
> 
> diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
> index e16b248699d5c3c..19c2d487cb08feb 100644
> --- a/arch/arm64/kernel/machine_kexec.c
> +++ b/arch/arm64/kernel/machine_kexec.c
> @@ -329,8 +329,13 @@ bool crash_is_nosave(unsigned long pfn)
>   
>   	/* in reserved memory? */
>   	addr = __pfn_to_phys(pfn);
> -	if ((addr < crashk_res.start) || (crashk_res.end < addr))
> -		return false;
> +	if ((addr < crashk_res.start) || (crashk_res.end < addr)) {
> +		if (!crashk_low_res.end)
> +			return false;
> +
> +		if ((addr < crashk_low_res.start) || (crashk_low_res.end < addr))
> +			return false;
> +	}
>   
>   	if (!kexec_crash_image)
>   		return true;
> diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c
> index 59c648d51848886..889951291cc0f9c 100644
> --- a/arch/arm64/kernel/machine_kexec_file.c
> +++ b/arch/arm64/kernel/machine_kexec_file.c
> @@ -65,10 +65,18 @@ static int prepare_elf_headers(void **addr, unsigned long *sz)
>   
>   	/* Exclude crashkernel region */
>   	ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end);
> +	if (ret)
> +		goto out;
> +
> +	if (crashk_low_res.end) {
> +		ret = crash_exclude_mem_range(cmem, crashk_low_res.start, crashk_low_res.end);
> +		if (ret)
> +			goto out;
> +	}
>   
> -	if (!ret)
> -		ret =  crash_prepare_elf64_headers(cmem, true, addr, sz);
> +	ret = crash_prepare_elf64_headers(cmem, true, addr, sz);
>   
> +out:
>   	kfree(cmem);
>   	return ret;
>   }
> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
> index 6c653a2c7cff052..a5d43feac0d7d96 100644
> --- a/arch/arm64/mm/init.c
> +++ b/arch/arm64/mm/init.c
> @@ -71,6 +71,30 @@ phys_addr_t arm64_dma_phys_limit __ro_after_init;
>   #define CRASH_ADDR_LOW_MAX	arm64_dma_phys_limit
>   #define CRASH_ADDR_HIGH_MAX	MEMBLOCK_ALLOC_ACCESSIBLE
>   
> +static int __init reserve_crashkernel_low(unsigned long long low_size)
> +{
> +	unsigned long long low_base;
> +
> +	/* passed with crashkernel=0,low ? */
> +	if (!low_size)
> +		return 0;
> +
> +	low_base = memblock_phys_alloc_range(low_size, CRASH_ALIGN, 0, CRASH_ADDR_LOW_MAX);
> +	if (!low_base) {
> +		pr_err("cannot allocate crashkernel low memory (size:0x%llx).\n", low_size);
> +		return -ENOMEM;
> +	}
> +
> +	pr_info("crashkernel low memory reserved: 0x%llx - 0x%llx (%lld MB)\n",
> +		low_base, low_base + low_size, low_size >> 20);
> +
> +	crashk_low_res.start = low_base;
> +	crashk_low_res.end   = low_base + low_size - 1;
> +	insert_resource(&iomem_resource, &crashk_low_res);
> +
> +	return 0;
> +}
> +
>   /*
>    * reserve_crashkernel() - reserves memory for crash kernel
>    *
> @@ -81,29 +105,62 @@ phys_addr_t arm64_dma_phys_limit __ro_after_init;
>   static void __init reserve_crashkernel(void)
>   {
>   	unsigned long long crash_base, crash_size;
> +	unsigned long long crash_low_size = SZ_256M;
>   	unsigned long long crash_max = CRASH_ADDR_LOW_MAX;
>   	int ret;
> +	bool fixed_base;
> +	char *cmdline = boot_command_line;
>   
> -	ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
> +	/* crashkernel=X[@offset] */
> +	ret = parse_crashkernel(cmdline, memblock_phys_mem_size(),
>   				&crash_size, &crash_base);
> -	/* no crashkernel= or invalid value specified */
> -	if (ret || !crash_size)
> -		return;
> +	if (ret || !crash_size) {
> +		unsigned long long low_size;
>   
> +		/* crashkernel=X,high */
> +		ret = parse_crashkernel_high(cmdline, 0, &crash_size, &crash_base);
> +		if (ret || !crash_size)
> +			return;
> +
> +		/* crashkernel=X,low */
> +		ret = parse_crashkernel_low(cmdline, 0, &low_size, &crash_base);
> +		if (!ret)
> +			crash_low_size = low_size;
> +
> +		crash_max = CRASH_ADDR_HIGH_MAX;
> +	}
> +
> +	fixed_base = !!crash_base;
>   	crash_size = PAGE_ALIGN(crash_size);
>   
>   	/* User specifies base address explicitly. */
>   	if (crash_base)
>   		crash_max = crash_base + crash_size;
>   
> +retry:
>   	crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN,
>   					       crash_base, crash_max);
>   	if (!crash_base) {
> +		/*
> +		 * Attempt to fully allocate low memory failed, fall back
> +		 * to high memory, the minimum required low memory will be
> +		 * reserved later.
> +		 */
> +		if (!fixed_base && (crash_max == CRASH_ADDR_LOW_MAX)) {
> +			crash_max = CRASH_ADDR_HIGH_MAX;
> +			goto retry;
> +		}
> +
>   		pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
>   			crash_size);
>   		return;
>   	}
>   
> +	if (crash_base >= SZ_4G && reserve_crashkernel_low(crash_low_size)) {
> +		memblock_phys_free(crash_base, crash_size);
> +		return;
> +	}
> +
>   	pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n",
>   		crash_base, crash_base + crash_size, crash_size >> 20);
>   
> @@ -112,6 +169,9 @@ static void __init reserve_crashkernel(void)
>   	 * map. Inform kmemleak so that it won't try to access it.
>   	 */
>   	kmemleak_ignore_phys(crash_base);
> +	if (crashk_low_res.end)
> +		kmemleak_ignore_phys(crashk_low_res.start);
> +
>   	crashk_res.start = crash_base;
>   	crashk_res.end = crash_base + crash_size - 1;
>   	insert_resource(&iomem_resource, &crashk_res);
Baoquan He Feb. 11, 2022, 10:30 a.m. UTC | #2
On 01/24/22 at 04:47pm, Zhen Lei wrote:
> From: Chen Zhou <chenzhou10@huawei.com>
......
> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
> index 6c653a2c7cff052..a5d43feac0d7d96 100644
> --- a/arch/arm64/mm/init.c
> +++ b/arch/arm64/mm/init.c
> @@ -71,6 +71,30 @@ phys_addr_t arm64_dma_phys_limit __ro_after_init;
>  #define CRASH_ADDR_LOW_MAX	arm64_dma_phys_limit
>  #define CRASH_ADDR_HIGH_MAX	MEMBLOCK_ALLOC_ACCESSIBLE
>  
> +static int __init reserve_crashkernel_low(unsigned long long low_size)
> +{
> +	unsigned long long low_base;
> +
> +	/* passed with crashkernel=0,low ? */
> +	if (!low_size)
> +		return 0;
> +
> +	low_base = memblock_phys_alloc_range(low_size, CRASH_ALIGN, 0, CRASH_ADDR_LOW_MAX);
> +	if (!low_base) {
> +		pr_err("cannot allocate crashkernel low memory (size:0x%llx).\n", low_size);
> +		return -ENOMEM;
> +	}
> +
> +	pr_info("crashkernel low memory reserved: 0x%llx - 0x%llx (%lld MB)\n",
> +		low_base, low_base + low_size, low_size >> 20);
> +
> +	crashk_low_res.start = low_base;
> +	crashk_low_res.end   = low_base + low_size - 1;
> +	insert_resource(&iomem_resource, &crashk_low_res);
> +
> +	return 0;
> +}
> +
>  /*
>   * reserve_crashkernel() - reserves memory for crash kernel
>   *
> @@ -81,29 +105,62 @@ phys_addr_t arm64_dma_phys_limit __ro_after_init;
>  static void __init reserve_crashkernel(void)
>  {
>  	unsigned long long crash_base, crash_size;
> +	unsigned long long crash_low_size = SZ_256M;
>  	unsigned long long crash_max = CRASH_ADDR_LOW_MAX;
>  	int ret;
> +	bool fixed_base;
> +	char *cmdline = boot_command_line;
>  
> -	ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
> +	/* crashkernel=X[@offset] */
> +	ret = parse_crashkernel(cmdline, memblock_phys_mem_size(),
>  				&crash_size, &crash_base);
> -	/* no crashkernel= or invalid value specified */
> -	if (ret || !crash_size)
> -		return;
> +	if (ret || !crash_size) {
> +		unsigned long long low_size;
>  
> +		/* crashkernel=X,high */
> +		ret = parse_crashkernel_high(cmdline, 0, &crash_size, &crash_base);
> +		if (ret || !crash_size)
> +			return;
> +
> +		/* crashkernel=X,low */
> +		ret = parse_crashkernel_low(cmdline, 0, &low_size, &crash_base);
> +		if (!ret)
> +			crash_low_size = low_size;

Here, the error case is not checked and handled. But it still gets
expeced result which is the default SZ_256M. Is this designed on
purpose?

> +
> +		crash_max = CRASH_ADDR_HIGH_MAX;
> +	}
> +
> +	fixed_base = !!crash_base;
>  	crash_size = PAGE_ALIGN(crash_size);
>  
>  	/* User specifies base address explicitly. */
>  	if (crash_base)
>  		crash_max = crash_base + crash_size;
>  
> +retry:
>  	crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN,
>  					       crash_base, crash_max);
>  	if (!crash_base) {
> +		/*
> +		 * Attempt to fully allocate low memory failed, fall back
> +		 * to high memory, the minimum required low memory will be
> +		 * reserved later.
> +		 */
> +		if (!fixed_base && (crash_max == CRASH_ADDR_LOW_MAX)) {
> +			crash_max = CRASH_ADDR_HIGH_MAX;
> +			goto retry;
> +		}
> +
>  		pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
>  			crash_size);
>  		return;
>  	}
>  
> +	if (crash_base >= SZ_4G && reserve_crashkernel_low(crash_low_size)) {
> +		memblock_phys_free(crash_base, crash_size);
> +		return;
> +	}
> +
>  	pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n",
>  		crash_base, crash_base + crash_size, crash_size >> 20);
>  
> @@ -112,6 +169,9 @@ static void __init reserve_crashkernel(void)
>  	 * map. Inform kmemleak so that it won't try to access it.
>  	 */
>  	kmemleak_ignore_phys(crash_base);
> +	if (crashk_low_res.end)
> +		kmemleak_ignore_phys(crashk_low_res.start);
> +
>  	crashk_res.start = crash_base;
>  	crashk_res.end = crash_base + crash_size - 1;
>  	insert_resource(&iomem_resource, &crashk_res);
> -- 
> 2.25.1
>
Leizhen (ThunderTown) Feb. 11, 2022, 10:41 a.m. UTC | #3
On 2022/2/11 18:30, Baoquan He wrote:
> On 01/24/22 at 04:47pm, Zhen Lei wrote:
>> From: Chen Zhou <chenzhou10@huawei.com>
> ......
>> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
>> index 6c653a2c7cff052..a5d43feac0d7d96 100644
>> --- a/arch/arm64/mm/init.c
>> +++ b/arch/arm64/mm/init.c
>> @@ -71,6 +71,30 @@ phys_addr_t arm64_dma_phys_limit __ro_after_init;
>>  #define CRASH_ADDR_LOW_MAX	arm64_dma_phys_limit
>>  #define CRASH_ADDR_HIGH_MAX	MEMBLOCK_ALLOC_ACCESSIBLE
>>  
>> +static int __init reserve_crashkernel_low(unsigned long long low_size)
>> +{
>> +	unsigned long long low_base;
>> +
>> +	/* passed with crashkernel=0,low ? */
>> +	if (!low_size)
>> +		return 0;
>> +
>> +	low_base = memblock_phys_alloc_range(low_size, CRASH_ALIGN, 0, CRASH_ADDR_LOW_MAX);
>> +	if (!low_base) {
>> +		pr_err("cannot allocate crashkernel low memory (size:0x%llx).\n", low_size);
>> +		return -ENOMEM;
>> +	}
>> +
>> +	pr_info("crashkernel low memory reserved: 0x%llx - 0x%llx (%lld MB)\n",
>> +		low_base, low_base + low_size, low_size >> 20);
>> +
>> +	crashk_low_res.start = low_base;
>> +	crashk_low_res.end   = low_base + low_size - 1;
>> +	insert_resource(&iomem_resource, &crashk_low_res);
>> +
>> +	return 0;
>> +}
>> +
>>  /*
>>   * reserve_crashkernel() - reserves memory for crash kernel
>>   *
>> @@ -81,29 +105,62 @@ phys_addr_t arm64_dma_phys_limit __ro_after_init;
>>  static void __init reserve_crashkernel(void)
>>  {
>>  	unsigned long long crash_base, crash_size;
>> +	unsigned long long crash_low_size = SZ_256M;
>>  	unsigned long long crash_max = CRASH_ADDR_LOW_MAX;
>>  	int ret;
>> +	bool fixed_base;
>> +	char *cmdline = boot_command_line;
>>  
>> -	ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
>> +	/* crashkernel=X[@offset] */
>> +	ret = parse_crashkernel(cmdline, memblock_phys_mem_size(),
>>  				&crash_size, &crash_base);
>> -	/* no crashkernel= or invalid value specified */
>> -	if (ret || !crash_size)
>> -		return;
>> +	if (ret || !crash_size) {
>> +		unsigned long long low_size;
>>  
>> +		/* crashkernel=X,high */
>> +		ret = parse_crashkernel_high(cmdline, 0, &crash_size, &crash_base);
>> +		if (ret || !crash_size)
>> +			return;
>> +
>> +		/* crashkernel=X,low */
>> +		ret = parse_crashkernel_low(cmdline, 0, &low_size, &crash_base);
>> +		if (!ret)
>> +			crash_low_size = low_size;
> 
> Here, the error case is not checked and handled. But it still gets
> expeced result which is the default SZ_256M. Is this designed on
> purpose?

Yes, we can specify only "crashkernel=X,high".

This is mentioned in Documentation/admin-guide/kernel-parameters.txt

        crashkernel=size[KMG],low
                        [KNL, X86-64] range under 4G. When crashkernel=X,high
                        is passed, kernel could allocate physical memory region
                        above 4G, that cause second kernel crash on system
                        that require some amount of low memory, e.g. swiotlb
                        requires at least 64M+32K low memory, also enough extra
                        low memory is needed to make sure DMA buffers for 32-bit
                        devices won't run out. Kernel would try to allocate at     <---------
                        least 256M below 4G automatically.                         <---------

> 
>> +
>> +		crash_max = CRASH_ADDR_HIGH_MAX;
>> +	}
>> +
>> +	fixed_base = !!crash_base;
>>  	crash_size = PAGE_ALIGN(crash_size);
>>  
>>  	/* User specifies base address explicitly. */
>>  	if (crash_base)
>>  		crash_max = crash_base + crash_size;
>>  
>> +retry:
>>  	crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN,
>>  					       crash_base, crash_max);
>>  	if (!crash_base) {
>> +		/*
>> +		 * Attempt to fully allocate low memory failed, fall back
>> +		 * to high memory, the minimum required low memory will be
>> +		 * reserved later.
>> +		 */
>> +		if (!fixed_base && (crash_max == CRASH_ADDR_LOW_MAX)) {
>> +			crash_max = CRASH_ADDR_HIGH_MAX;
>> +			goto retry;
>> +		}
>> +
>>  		pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
>>  			crash_size);
>>  		return;
>>  	}
>>  
>> +	if (crash_base >= SZ_4G && reserve_crashkernel_low(crash_low_size)) {
>> +		memblock_phys_free(crash_base, crash_size);
>> +		return;
>> +	}
>> +
>>  	pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n",
>>  		crash_base, crash_base + crash_size, crash_size >> 20);
>>  
>> @@ -112,6 +169,9 @@ static void __init reserve_crashkernel(void)
>>  	 * map. Inform kmemleak so that it won't try to access it.
>>  	 */
>>  	kmemleak_ignore_phys(crash_base);
>> +	if (crashk_low_res.end)
>> +		kmemleak_ignore_phys(crashk_low_res.start);
>> +
>>  	crashk_res.start = crash_base;
>>  	crashk_res.end = crash_base + crash_size - 1;
>>  	insert_resource(&iomem_resource, &crashk_res);
>> -- 
>> 2.25.1
>>
> 
> .
>
Baoquan He Feb. 11, 2022, 10:51 a.m. UTC | #4
On 02/11/22 at 06:41pm, Leizhen (ThunderTown) wrote:
> 
> 
> On 2022/2/11 18:30, Baoquan He wrote:
> > On 01/24/22 at 04:47pm, Zhen Lei wrote:
> >> From: Chen Zhou <chenzhou10@huawei.com>
> > ......
> >> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
> >> index 6c653a2c7cff052..a5d43feac0d7d96 100644
> >> --- a/arch/arm64/mm/init.c
> >> +++ b/arch/arm64/mm/init.c
> >> @@ -71,6 +71,30 @@ phys_addr_t arm64_dma_phys_limit __ro_after_init;
> >>  #define CRASH_ADDR_LOW_MAX	arm64_dma_phys_limit
> >>  #define CRASH_ADDR_HIGH_MAX	MEMBLOCK_ALLOC_ACCESSIBLE
> >>  
> >> +static int __init reserve_crashkernel_low(unsigned long long low_size)
> >> +{
> >> +	unsigned long long low_base;
> >> +
> >> +	/* passed with crashkernel=0,low ? */
> >> +	if (!low_size)
> >> +		return 0;
> >> +
> >> +	low_base = memblock_phys_alloc_range(low_size, CRASH_ALIGN, 0, CRASH_ADDR_LOW_MAX);
> >> +	if (!low_base) {
> >> +		pr_err("cannot allocate crashkernel low memory (size:0x%llx).\n", low_size);
> >> +		return -ENOMEM;
> >> +	}
> >> +
> >> +	pr_info("crashkernel low memory reserved: 0x%llx - 0x%llx (%lld MB)\n",
> >> +		low_base, low_base + low_size, low_size >> 20);
> >> +
> >> +	crashk_low_res.start = low_base;
> >> +	crashk_low_res.end   = low_base + low_size - 1;
> >> +	insert_resource(&iomem_resource, &crashk_low_res);
> >> +
> >> +	return 0;
> >> +}
> >> +
> >>  /*
> >>   * reserve_crashkernel() - reserves memory for crash kernel
> >>   *
> >> @@ -81,29 +105,62 @@ phys_addr_t arm64_dma_phys_limit __ro_after_init;
> >>  static void __init reserve_crashkernel(void)
> >>  {
> >>  	unsigned long long crash_base, crash_size;
> >> +	unsigned long long crash_low_size = SZ_256M;
> >>  	unsigned long long crash_max = CRASH_ADDR_LOW_MAX;
> >>  	int ret;
> >> +	bool fixed_base;
> >> +	char *cmdline = boot_command_line;
> >>  
> >> -	ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
> >> +	/* crashkernel=X[@offset] */
> >> +	ret = parse_crashkernel(cmdline, memblock_phys_mem_size(),
> >>  				&crash_size, &crash_base);
> >> -	/* no crashkernel= or invalid value specified */
> >> -	if (ret || !crash_size)
> >> -		return;
> >> +	if (ret || !crash_size) {
> >> +		unsigned long long low_size;
> >>  
> >> +		/* crashkernel=X,high */
> >> +		ret = parse_crashkernel_high(cmdline, 0, &crash_size, &crash_base);
> >> +		if (ret || !crash_size)
> >> +			return;
> >> +
> >> +		/* crashkernel=X,low */
> >> +		ret = parse_crashkernel_low(cmdline, 0, &low_size, &crash_base);
> >> +		if (!ret)
> >> +			crash_low_size = low_size;
> > 
> > Here, the error case is not checked and handled. But it still gets
> > expeced result which is the default SZ_256M. Is this designed on
> > purpose?
> 
> Yes, we can specify only "crashkernel=X,high".
> 
> This is mentioned in Documentation/admin-guide/kernel-parameters.txt
> 
>         crashkernel=size[KMG],low
>                         [KNL, X86-64] range under 4G. When crashkernel=X,high
>                         is passed, kernel could allocate physical memory region
>                         above 4G, that cause second kernel crash on system
>                         that require some amount of low memory, e.g. swiotlb
>                         requires at least 64M+32K low memory, also enough extra
>                         low memory is needed to make sure DMA buffers for 32-bit
>                         devices won't run out. Kernel would try to allocate at     <---------
>                         least 256M below 4G automatically.                         <---------

Yeah, that is expected becasue no crahskernel=,low is a right usage. The
'ret' is 0 in the case. If I gave below string, it works too.
"crashkernel=256M,high crashkernel=aaabbadfadfd,low"

> 
> > 
> >> +
> >> +		crash_max = CRASH_ADDR_HIGH_MAX;
> >> +	}
> >> +
> >> +	fixed_base = !!crash_base;
> >>  	crash_size = PAGE_ALIGN(crash_size);
> >>  
> >>  	/* User specifies base address explicitly. */
> >>  	if (crash_base)
> >>  		crash_max = crash_base + crash_size;
> >>  
> >> +retry:
> >>  	crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN,
> >>  					       crash_base, crash_max);
> >>  	if (!crash_base) {
> >> +		/*
> >> +		 * Attempt to fully allocate low memory failed, fall back
> >> +		 * to high memory, the minimum required low memory will be
> >> +		 * reserved later.
> >> +		 */
> >> +		if (!fixed_base && (crash_max == CRASH_ADDR_LOW_MAX)) {
> >> +			crash_max = CRASH_ADDR_HIGH_MAX;
> >> +			goto retry;
> >> +		}
> >> +
> >>  		pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
> >>  			crash_size);
> >>  		return;
> >>  	}
> >>  
> >> +	if (crash_base >= SZ_4G && reserve_crashkernel_low(crash_low_size)) {
> >> +		memblock_phys_free(crash_base, crash_size);
> >> +		return;
> >> +	}
> >> +
> >>  	pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n",
> >>  		crash_base, crash_base + crash_size, crash_size >> 20);
> >>  
> >> @@ -112,6 +169,9 @@ static void __init reserve_crashkernel(void)
> >>  	 * map. Inform kmemleak so that it won't try to access it.
> >>  	 */
> >>  	kmemleak_ignore_phys(crash_base);
> >> +	if (crashk_low_res.end)
> >> +		kmemleak_ignore_phys(crashk_low_res.start);
> >> +
> >>  	crashk_res.start = crash_base;
> >>  	crashk_res.end = crash_base + crash_size - 1;
> >>  	insert_resource(&iomem_resource, &crashk_res);
> >> -- 
> >> 2.25.1
> >>
> > 
> > .
> > 
> 
> -- 
> Regards,
>   Zhen Lei
>
Baoquan He Feb. 14, 2022, 3:52 a.m. UTC | #5
On 01/24/22 at 04:47pm, Zhen Lei wrote:
......
> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
> index 6c653a2c7cff052..a5d43feac0d7d96 100644
> --- a/arch/arm64/mm/init.c
> +++ b/arch/arm64/mm/init.c
> @@ -71,6 +71,30 @@ phys_addr_t arm64_dma_phys_limit __ro_after_init;
>  #define CRASH_ADDR_LOW_MAX	arm64_dma_phys_limit
>  #define CRASH_ADDR_HIGH_MAX	MEMBLOCK_ALLOC_ACCESSIBLE
>  
> +static int __init reserve_crashkernel_low(unsigned long long low_size)
> +{
> +	unsigned long long low_base;
> +
> +	/* passed with crashkernel=0,low ? */
> +	if (!low_size)
> +		return 0;
> +
> +	low_base = memblock_phys_alloc_range(low_size, CRASH_ALIGN, 0, CRASH_ADDR_LOW_MAX);
> +	if (!low_base) {
> +		pr_err("cannot allocate crashkernel low memory (size:0x%llx).\n", low_size);
> +		return -ENOMEM;
> +	}
> +
> +	pr_info("crashkernel low memory reserved: 0x%llx - 0x%llx (%lld MB)\n",
> +		low_base, low_base + low_size, low_size >> 20);
> +
> +	crashk_low_res.start = low_base;
> +	crashk_low_res.end   = low_base + low_size - 1;
> +	insert_resource(&iomem_resource, &crashk_low_res);
> +
> +	return 0;
> +}
> +
>  /*
>   * reserve_crashkernel() - reserves memory for crash kernel

My another concern is the crashkernel=,low handling. In this patch, the
code related to low memory is obscure. Wondering if we should make them
explicit with a little redundant but very clear code flows. Saying this
because the code must be very clear to you and reviewers, it may be
harder for later code reader or anyone interested to understand.

1) crashkernel=X,high
2) crashkernel=X,high crashkernel=Y,low
3) crashkernel=X,high crashkernel=0,low
4) crashkernel=X,high crashkernel='messy code',low
5) crashkernel=X //fall back to high memory, low memory is required then.

It could be me thinking about it too much. I made changes to your patch
with a tuning, not sure if it's OK to you. Otherwise, this patchset
works very well for all above test cases, it's ripe to be merged for
wider testing.

diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index a5d43feac0d7..671862c56d7d 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -94,7 +94,8 @@ static int __init reserve_crashkernel_low(unsigned long long low_size)
 
 	return 0;
 }
-
+/*Words explaining why it's 256M*/
+#define DEFAULT_CRASH_KERNEL_LOW_SIZE SZ_256M
 /*
  * reserve_crashkernel() - reserves memory for crash kernel
  *
@@ -105,10 +106,10 @@ static int __init reserve_crashkernel_low(unsigned long long low_size)
 static void __init reserve_crashkernel(void)
 {
 	unsigned long long crash_base, crash_size;
-	unsigned long long crash_low_size = SZ_256M;
+	unsigned long long crash_low_size;
 	unsigned long long crash_max = CRASH_ADDR_LOW_MAX;
 	int ret;
-	bool fixed_base;
+	bool fixed_base, high;
 	char *cmdline = boot_command_line;
 
 	/* crashkernel=X[@offset] */
@@ -126,7 +127,10 @@ static void __init reserve_crashkernel(void)
 		ret = parse_crashkernel_low(cmdline, 0, &low_size, &crash_base);
 		if (!ret)
 			crash_low_size = low_size;
+		else
+			crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE;
 
+		high = true;
 		crash_max = CRASH_ADDR_HIGH_MAX;
 	}
 
@@ -134,7 +138,7 @@ static void __init reserve_crashkernel(void)
 	crash_size = PAGE_ALIGN(crash_size);
 
 	/* User specifies base address explicitly. */
-	if (crash_base)
+	if (fixed_base)
 		crash_max = crash_base + crash_size;
 
 retry:
@@ -156,7 +160,10 @@ static void __init reserve_crashkernel(void)
 		return;
 	}
 
-	if (crash_base >= SZ_4G && reserve_crashkernel_low(crash_low_size)) {
+	if (crash_base >= SZ_4G && !high) 
+		crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE;
+
+	if (reserve_crashkernel_low(crash_low_size)) {
 		memblock_phys_free(crash_base, crash_size);
 		return;
 	}

>   *
> @@ -81,29 +105,62 @@ phys_addr_t arm64_dma_phys_limit __ro_after_init;
>  static void __init reserve_crashkernel(void)
>  {
>  	unsigned long long crash_base, crash_size;
> +	unsigned long long crash_low_size = SZ_256M;
>  	unsigned long long crash_max = CRASH_ADDR_LOW_MAX;
>  	int ret;
> +	bool fixed_base;
> +	char *cmdline = boot_command_line;
>  
> -	ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
> +	/* crashkernel=X[@offset] */
> +	ret = parse_crashkernel(cmdline, memblock_phys_mem_size(),
>  				&crash_size, &crash_base);
> -	/* no crashkernel= or invalid value specified */
> -	if (ret || !crash_size)
> -		return;
> +	if (ret || !crash_size) {
> +		unsigned long long low_size;
>  
> +		/* crashkernel=X,high */
> +		ret = parse_crashkernel_high(cmdline, 0, &crash_size, &crash_base);
> +		if (ret || !crash_size)
> +			return;
> +
> +		/* crashkernel=X,low */
> +		ret = parse_crashkernel_low(cmdline, 0, &low_size, &crash_base);
> +		if (!ret)
> +			crash_low_size = low_size;
> +
> +		crash_max = CRASH_ADDR_HIGH_MAX;
> +	}
> +
> +	fixed_base = !!crash_base;
>  	crash_size = PAGE_ALIGN(crash_size);
>  
>  	/* User specifies base address explicitly. */
>  	if (crash_base)
>  		crash_max = crash_base + crash_size;
>  
> +retry:
>  	crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN,
>  					       crash_base, crash_max);
>  	if (!crash_base) {
> +		/*
> +		 * Attempt to fully allocate low memory failed, fall back
> +		 * to high memory, the minimum required low memory will be
> +		 * reserved later.
> +		 */
> +		if (!fixed_base && (crash_max == CRASH_ADDR_LOW_MAX)) {
> +			crash_max = CRASH_ADDR_HIGH_MAX;
> +			goto retry;
> +		}
> +
>  		pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
>  			crash_size);
>  		return;
>  	}
>  
> +	if (crash_base >= SZ_4G && reserve_crashkernel_low(crash_low_size)) {
> +		memblock_phys_free(crash_base, crash_size);
> +		return;
> +	}
> +
>  	pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n",
>  		crash_base, crash_base + crash_size, crash_size >> 20);
>  
> @@ -112,6 +169,9 @@ static void __init reserve_crashkernel(void)
>  	 * map. Inform kmemleak so that it won't try to access it.
>  	 */
>  	kmemleak_ignore_phys(crash_base);
> +	if (crashk_low_res.end)
> +		kmemleak_ignore_phys(crashk_low_res.start);
> +
>  	crashk_res.start = crash_base;
>  	crashk_res.end = crash_base + crash_size - 1;
>  	insert_resource(&iomem_resource, &crashk_res);
> -- 
> 2.25.1
>
Leizhen (ThunderTown) Feb. 14, 2022, 6:44 a.m. UTC | #6
On 2022/2/11 18:51, Baoquan He wrote:
> On 02/11/22 at 06:41pm, Leizhen (ThunderTown) wrote:
>>
>>
>> On 2022/2/11 18:30, Baoquan He wrote:
>>> On 01/24/22 at 04:47pm, Zhen Lei wrote:
>>>> From: Chen Zhou <chenzhou10@huawei.com>
>>> ......
>>>> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
>>>> index 6c653a2c7cff052..a5d43feac0d7d96 100644
>>>> --- a/arch/arm64/mm/init.c
>>>> +++ b/arch/arm64/mm/init.c
>>>> @@ -71,6 +71,30 @@ phys_addr_t arm64_dma_phys_limit __ro_after_init;
>>>>  #define CRASH_ADDR_LOW_MAX	arm64_dma_phys_limit
>>>>  #define CRASH_ADDR_HIGH_MAX	MEMBLOCK_ALLOC_ACCESSIBLE
>>>>  
>>>> +static int __init reserve_crashkernel_low(unsigned long long low_size)
>>>> +{
>>>> +	unsigned long long low_base;
>>>> +
>>>> +	/* passed with crashkernel=0,low ? */
>>>> +	if (!low_size)
>>>> +		return 0;
>>>> +
>>>> +	low_base = memblock_phys_alloc_range(low_size, CRASH_ALIGN, 0, CRASH_ADDR_LOW_MAX);
>>>> +	if (!low_base) {
>>>> +		pr_err("cannot allocate crashkernel low memory (size:0x%llx).\n", low_size);
>>>> +		return -ENOMEM;
>>>> +	}
>>>> +
>>>> +	pr_info("crashkernel low memory reserved: 0x%llx - 0x%llx (%lld MB)\n",
>>>> +		low_base, low_base + low_size, low_size >> 20);
>>>> +
>>>> +	crashk_low_res.start = low_base;
>>>> +	crashk_low_res.end   = low_base + low_size - 1;
>>>> +	insert_resource(&iomem_resource, &crashk_low_res);
>>>> +
>>>> +	return 0;
>>>> +}
>>>> +
>>>>  /*
>>>>   * reserve_crashkernel() - reserves memory for crash kernel
>>>>   *
>>>> @@ -81,29 +105,62 @@ phys_addr_t arm64_dma_phys_limit __ro_after_init;
>>>>  static void __init reserve_crashkernel(void)
>>>>  {
>>>>  	unsigned long long crash_base, crash_size;
>>>> +	unsigned long long crash_low_size = SZ_256M;
>>>>  	unsigned long long crash_max = CRASH_ADDR_LOW_MAX;
>>>>  	int ret;
>>>> +	bool fixed_base;
>>>> +	char *cmdline = boot_command_line;
>>>>  
>>>> -	ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
>>>> +	/* crashkernel=X[@offset] */
>>>> +	ret = parse_crashkernel(cmdline, memblock_phys_mem_size(),
>>>>  				&crash_size, &crash_base);
>>>> -	/* no crashkernel= or invalid value specified */
>>>> -	if (ret || !crash_size)
>>>> -		return;
>>>> +	if (ret || !crash_size) {
>>>> +		unsigned long long low_size;
>>>>  
>>>> +		/* crashkernel=X,high */
>>>> +		ret = parse_crashkernel_high(cmdline, 0, &crash_size, &crash_base);
>>>> +		if (ret || !crash_size)
>>>> +			return;
>>>> +
>>>> +		/* crashkernel=X,low */
>>>> +		ret = parse_crashkernel_low(cmdline, 0, &low_size, &crash_base);
>>>> +		if (!ret)
>>>> +			crash_low_size = low_size;
>>>
>>> Here, the error case is not checked and handled. But it still gets
>>> expeced result which is the default SZ_256M. Is this designed on
>>> purpose?
>>
>> Yes, we can specify only "crashkernel=X,high".
>>
>> This is mentioned in Documentation/admin-guide/kernel-parameters.txt
>>
>>         crashkernel=size[KMG],low
>>                         [KNL, X86-64] range under 4G. When crashkernel=X,high
>>                         is passed, kernel could allocate physical memory region
>>                         above 4G, that cause second kernel crash on system
>>                         that require some amount of low memory, e.g. swiotlb
>>                         requires at least 64M+32K low memory, also enough extra
>>                         low memory is needed to make sure DMA buffers for 32-bit
>>                         devices won't run out. Kernel would try to allocate at     <---------
>>                         least 256M below 4G automatically.                         <---------
> 
> Yeah, that is expected becasue no crahskernel=,low is a right usage. The
> 'ret' is 0 in the case. If I gave below string, it works too.
> "crashkernel=256M,high crashkernel=aaabbadfadfd,low"

Yes, so maybe we should change the error code in __parse_crashkernel()
from "-EINVAL" to "-ENOENT" when the specified option does not exist.

diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index 256cf6db573cd09..395f4fac1773f28 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -243,9 +243,8 @@ static int __init __parse_crashkernel(char *cmdline,
        *crash_base = 0;

        ck_cmdline = get_last_crashkernel(cmdline, name, suffix);
-
        if (!ck_cmdline)
-               return -EINVAL;
+               return -ENOENT;

        ck_cmdline += strlen(name);


> 
>>
>>>
>>>> +
>>>> +		crash_max = CRASH_ADDR_HIGH_MAX;
>>>> +	}
>>>> +
>>>> +	fixed_base = !!crash_base;
>>>>  	crash_size = PAGE_ALIGN(crash_size);
>>>>  
>>>>  	/* User specifies base address explicitly. */
>>>>  	if (crash_base)
>>>>  		crash_max = crash_base + crash_size;
>>>>  
>>>> +retry:
>>>>  	crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN,
>>>>  					       crash_base, crash_max);
>>>>  	if (!crash_base) {
>>>> +		/*
>>>> +		 * Attempt to fully allocate low memory failed, fall back
>>>> +		 * to high memory, the minimum required low memory will be
>>>> +		 * reserved later.
>>>> +		 */
>>>> +		if (!fixed_base && (crash_max == CRASH_ADDR_LOW_MAX)) {
>>>> +			crash_max = CRASH_ADDR_HIGH_MAX;
>>>> +			goto retry;
>>>> +		}
>>>> +
>>>>  		pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
>>>>  			crash_size);
>>>>  		return;
>>>>  	}
>>>>  
>>>> +	if (crash_base >= SZ_4G && reserve_crashkernel_low(crash_low_size)) {
>>>> +		memblock_phys_free(crash_base, crash_size);
>>>> +		return;
>>>> +	}
>>>> +
>>>>  	pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n",
>>>>  		crash_base, crash_base + crash_size, crash_size >> 20);
>>>>  
>>>> @@ -112,6 +169,9 @@ static void __init reserve_crashkernel(void)
>>>>  	 * map. Inform kmemleak so that it won't try to access it.
>>>>  	 */
>>>>  	kmemleak_ignore_phys(crash_base);
>>>> +	if (crashk_low_res.end)
>>>> +		kmemleak_ignore_phys(crashk_low_res.start);
>>>> +
>>>>  	crashk_res.start = crash_base;
>>>>  	crashk_res.end = crash_base + crash_size - 1;
>>>>  	insert_resource(&iomem_resource, &crashk_res);
>>>> -- 
>>>> 2.25.1
>>>>
>>>
>>> .
>>>
>>
>> -- 
>> Regards,
>>   Zhen Lei
>>
> 
> .
>
Baoquan He Feb. 14, 2022, 7:09 a.m. UTC | #7
On 02/14/22 at 02:44pm, Leizhen (ThunderTown) wrote:
> 
> 
> On 2022/2/11 18:51, Baoquan He wrote:
> > On 02/11/22 at 06:41pm, Leizhen (ThunderTown) wrote:
> >>
> >>
> >> On 2022/2/11 18:30, Baoquan He wrote:
> >>> On 01/24/22 at 04:47pm, Zhen Lei wrote:
> >>>> From: Chen Zhou <chenzhou10@huawei.com>
> >>> ......
> >>>> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
> >>>> index 6c653a2c7cff052..a5d43feac0d7d96 100644
> >>>> --- a/arch/arm64/mm/init.c
> >>>> +++ b/arch/arm64/mm/init.c
> >>>> @@ -71,6 +71,30 @@ phys_addr_t arm64_dma_phys_limit __ro_after_init;
> >>>>  #define CRASH_ADDR_LOW_MAX	arm64_dma_phys_limit
> >>>>  #define CRASH_ADDR_HIGH_MAX	MEMBLOCK_ALLOC_ACCESSIBLE
> >>>>  
> >>>> +static int __init reserve_crashkernel_low(unsigned long long low_size)
> >>>> +{
> >>>> +	unsigned long long low_base;
> >>>> +
> >>>> +	/* passed with crashkernel=0,low ? */
> >>>> +	if (!low_size)
> >>>> +		return 0;
> >>>> +
> >>>> +	low_base = memblock_phys_alloc_range(low_size, CRASH_ALIGN, 0, CRASH_ADDR_LOW_MAX);
> >>>> +	if (!low_base) {
> >>>> +		pr_err("cannot allocate crashkernel low memory (size:0x%llx).\n", low_size);
> >>>> +		return -ENOMEM;
> >>>> +	}
> >>>> +
> >>>> +	pr_info("crashkernel low memory reserved: 0x%llx - 0x%llx (%lld MB)\n",
> >>>> +		low_base, low_base + low_size, low_size >> 20);
> >>>> +
> >>>> +	crashk_low_res.start = low_base;
> >>>> +	crashk_low_res.end   = low_base + low_size - 1;
> >>>> +	insert_resource(&iomem_resource, &crashk_low_res);
> >>>> +
> >>>> +	return 0;
> >>>> +}
> >>>> +
> >>>>  /*
> >>>>   * reserve_crashkernel() - reserves memory for crash kernel
> >>>>   *
> >>>> @@ -81,29 +105,62 @@ phys_addr_t arm64_dma_phys_limit __ro_after_init;
> >>>>  static void __init reserve_crashkernel(void)
> >>>>  {
> >>>>  	unsigned long long crash_base, crash_size;
> >>>> +	unsigned long long crash_low_size = SZ_256M;
> >>>>  	unsigned long long crash_max = CRASH_ADDR_LOW_MAX;
> >>>>  	int ret;
> >>>> +	bool fixed_base;
> >>>> +	char *cmdline = boot_command_line;
> >>>>  
> >>>> -	ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
> >>>> +	/* crashkernel=X[@offset] */
> >>>> +	ret = parse_crashkernel(cmdline, memblock_phys_mem_size(),
> >>>>  				&crash_size, &crash_base);
> >>>> -	/* no crashkernel= or invalid value specified */
> >>>> -	if (ret || !crash_size)
> >>>> -		return;
> >>>> +	if (ret || !crash_size) {
> >>>> +		unsigned long long low_size;
> >>>>  
> >>>> +		/* crashkernel=X,high */
> >>>> +		ret = parse_crashkernel_high(cmdline, 0, &crash_size, &crash_base);
> >>>> +		if (ret || !crash_size)
> >>>> +			return;
> >>>> +
> >>>> +		/* crashkernel=X,low */
> >>>> +		ret = parse_crashkernel_low(cmdline, 0, &low_size, &crash_base);
> >>>> +		if (!ret)
> >>>> +			crash_low_size = low_size;
> >>>
> >>> Here, the error case is not checked and handled. But it still gets
> >>> expeced result which is the default SZ_256M. Is this designed on
> >>> purpose?
> >>
> >> Yes, we can specify only "crashkernel=X,high".
> >>
> >> This is mentioned in Documentation/admin-guide/kernel-parameters.txt
> >>
> >>         crashkernel=size[KMG],low
> >>                         [KNL, X86-64] range under 4G. When crashkernel=X,high
> >>                         is passed, kernel could allocate physical memory region
> >>                         above 4G, that cause second kernel crash on system
> >>                         that require some amount of low memory, e.g. swiotlb
> >>                         requires at least 64M+32K low memory, also enough extra
> >>                         low memory is needed to make sure DMA buffers for 32-bit
> >>                         devices won't run out. Kernel would try to allocate at     <---------
> >>                         least 256M below 4G automatically.                         <---------
> > 
> > Yeah, that is expected becasue no crahskernel=,low is a right usage. The
> > 'ret' is 0 in the case. If I gave below string, it works too.
> > "crashkernel=256M,high crashkernel=aaabbadfadfd,low"
> 
> Yes, so maybe we should change the error code in __parse_crashkernel()
> from "-EINVAL" to "-ENOENT" when the specified option does not exist.

Good point. I also thought of this, it could be next step clean up. X86
code need this too. In crashkernel='messy code',high, it will fail to
reserve. For consistency, we should fail crashkrenel='messy code',low
too.

> 
> diff --git a/kernel/crash_core.c b/kernel/crash_core.c
> index 256cf6db573cd09..395f4fac1773f28 100644
> --- a/kernel/crash_core.c
> +++ b/kernel/crash_core.c
> @@ -243,9 +243,8 @@ static int __init __parse_crashkernel(char *cmdline,
>         *crash_base = 0;
> 
>         ck_cmdline = get_last_crashkernel(cmdline, name, suffix);
> -
>         if (!ck_cmdline)
> -               return -EINVAL;
> +               return -ENOENT;
> 
>         ck_cmdline += strlen(name);
> 
> 
> > 
> >>
> >>>
> >>>> +
> >>>> +		crash_max = CRASH_ADDR_HIGH_MAX;
> >>>> +	}
> >>>> +
> >>>> +	fixed_base = !!crash_base;
> >>>>  	crash_size = PAGE_ALIGN(crash_size);
> >>>>  
> >>>>  	/* User specifies base address explicitly. */
> >>>>  	if (crash_base)
> >>>>  		crash_max = crash_base + crash_size;
> >>>>  
> >>>> +retry:
> >>>>  	crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN,
> >>>>  					       crash_base, crash_max);
> >>>>  	if (!crash_base) {
> >>>> +		/*
> >>>> +		 * Attempt to fully allocate low memory failed, fall back
> >>>> +		 * to high memory, the minimum required low memory will be
> >>>> +		 * reserved later.
> >>>> +		 */
> >>>> +		if (!fixed_base && (crash_max == CRASH_ADDR_LOW_MAX)) {
> >>>> +			crash_max = CRASH_ADDR_HIGH_MAX;
> >>>> +			goto retry;
> >>>> +		}
> >>>> +
> >>>>  		pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
> >>>>  			crash_size);
> >>>>  		return;
> >>>>  	}
> >>>>  
> >>>> +	if (crash_base >= SZ_4G && reserve_crashkernel_low(crash_low_size)) {
> >>>> +		memblock_phys_free(crash_base, crash_size);
> >>>> +		return;
> >>>> +	}
> >>>> +
> >>>>  	pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n",
> >>>>  		crash_base, crash_base + crash_size, crash_size >> 20);
> >>>>  
> >>>> @@ -112,6 +169,9 @@ static void __init reserve_crashkernel(void)
> >>>>  	 * map. Inform kmemleak so that it won't try to access it.
> >>>>  	 */
> >>>>  	kmemleak_ignore_phys(crash_base);
> >>>> +	if (crashk_low_res.end)
> >>>> +		kmemleak_ignore_phys(crashk_low_res.start);
> >>>> +
> >>>>  	crashk_res.start = crash_base;
> >>>>  	crashk_res.end = crash_base + crash_size - 1;
> >>>>  	insert_resource(&iomem_resource, &crashk_res);
> >>>> -- 
> >>>> 2.25.1
> >>>>
> >>>
> >>> .
> >>>
> >>
> >> -- 
> >> Regards,
> >>   Zhen Lei
> >>
> > 
> > .
> > 
> 
> -- 
> Regards,
>   Zhen Lei
>
Leizhen (ThunderTown) Feb. 14, 2022, 7:53 a.m. UTC | #8
On 2022/2/14 11:52, Baoquan He wrote:
> On 01/24/22 at 04:47pm, Zhen Lei wrote:
> ......
>> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
>> index 6c653a2c7cff052..a5d43feac0d7d96 100644
>> --- a/arch/arm64/mm/init.c
>> +++ b/arch/arm64/mm/init.c
>> @@ -71,6 +71,30 @@ phys_addr_t arm64_dma_phys_limit __ro_after_init;
>>  #define CRASH_ADDR_LOW_MAX	arm64_dma_phys_limit
>>  #define CRASH_ADDR_HIGH_MAX	MEMBLOCK_ALLOC_ACCESSIBLE
>>  
>> +static int __init reserve_crashkernel_low(unsigned long long low_size)
>> +{
>> +	unsigned long long low_base;
>> +
>> +	/* passed with crashkernel=0,low ? */
>> +	if (!low_size)
>> +		return 0;
>> +
>> +	low_base = memblock_phys_alloc_range(low_size, CRASH_ALIGN, 0, CRASH_ADDR_LOW_MAX);
>> +	if (!low_base) {
>> +		pr_err("cannot allocate crashkernel low memory (size:0x%llx).\n", low_size);
>> +		return -ENOMEM;
>> +	}
>> +
>> +	pr_info("crashkernel low memory reserved: 0x%llx - 0x%llx (%lld MB)\n",
>> +		low_base, low_base + low_size, low_size >> 20);
>> +
>> +	crashk_low_res.start = low_base;
>> +	crashk_low_res.end   = low_base + low_size - 1;
>> +	insert_resource(&iomem_resource, &crashk_low_res);
>> +
>> +	return 0;
>> +}
>> +
>>  /*
>>   * reserve_crashkernel() - reserves memory for crash kernel
> 
> My another concern is the crashkernel=,low handling. In this patch, the
> code related to low memory is obscure. Wondering if we should make them
> explicit with a little redundant but very clear code flows. Saying this
> because the code must be very clear to you and reviewers, it may be
> harder for later code reader or anyone interested to understand.
> 
> 1) crashkernel=X,high
> 2) crashkernel=X,high crashkernel=Y,low
> 3) crashkernel=X,high crashkernel=0,low
> 4) crashkernel=X,high crashkernel='messy code',low
> 5) crashkernel=X //fall back to high memory, low memory is required then.
> 
> It could be me thinking about it too much. I made changes to your patch
> with a tuning, not sure if it's OK to you. Otherwise, this patchset

I think it's good.

> works very well for all above test cases, it's ripe to be merged for
> wider testing.

I will test it tomorrow. I've prepared a little more use cases than yours.

1) crashkernel=4G						//high=4G, low=256M
2) crashkernel=4G crashkernel=512M,high crashkernel=512M,low	//high=4G, low=256M, high and low are ignored
3) crashkernel=4G crashkernel=512M,high				//high=4G, low=256M, high is ignored
4) crashkernel=4G crashkernel=512M,low				//high=4G, low=256M, low is ignored
5) crashkernel=4G@0xe0000000					//high=0G, low=0M, cannot allocate, failed
6) crashkernel=512M						//high=0G, low=512M
7) crashkernel=128M						//high=0G, low=128M
8) crashkernel=512M@0xde000000		//512M@3552M		//high=0G, low=512M
9) crashkernel=4G,high						//high=4G, low=256M
a) crashkernel=4G,high crashkernel=512M,low			//high=4G, low=512M
b) crashkernel=512M,high crashkernel=128M,low			//high=512M, low=128M
c) crashkernel=512M,low						//high=0G, low=0M, invalid


> 
> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
> index a5d43feac0d7..671862c56d7d 100644
> --- a/arch/arm64/mm/init.c
> +++ b/arch/arm64/mm/init.c
> @@ -94,7 +94,8 @@ static int __init reserve_crashkernel_low(unsigned long long low_size)
>  
>  	return 0;
>  }
> -
> +/*Words explaining why it's 256M*/
> +#define DEFAULT_CRASH_KERNEL_LOW_SIZE SZ_256M
>  /*
>   * reserve_crashkernel() - reserves memory for crash kernel
>   *
> @@ -105,10 +106,10 @@ static int __init reserve_crashkernel_low(unsigned long long low_size)
>  static void __init reserve_crashkernel(void)
>  {
>  	unsigned long long crash_base, crash_size;
> -	unsigned long long crash_low_size = SZ_256M;
> +	unsigned long long crash_low_size;
>  	unsigned long long crash_max = CRASH_ADDR_LOW_MAX;
>  	int ret;
> -	bool fixed_base;
> +	bool fixed_base, high;
>  	char *cmdline = boot_command_line;
>  
>  	/* crashkernel=X[@offset] */
> @@ -126,7 +127,10 @@ static void __init reserve_crashkernel(void)
>  		ret = parse_crashkernel_low(cmdline, 0, &low_size, &crash_base);
>  		if (!ret)
>  			crash_low_size = low_size;
> +		else
> +			crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE;
>  
> +		high = true;
>  		crash_max = CRASH_ADDR_HIGH_MAX;
>  	}
>  
> @@ -134,7 +138,7 @@ static void __init reserve_crashkernel(void)
>  	crash_size = PAGE_ALIGN(crash_size);
>  
>  	/* User specifies base address explicitly. */
> -	if (crash_base)
> +	if (fixed_base)
>  		crash_max = crash_base + crash_size;
>  
>  retry:
> @@ -156,7 +160,10 @@ static void __init reserve_crashkernel(void)
>  		return;
>  	}
>  
> -	if (crash_base >= SZ_4G && reserve_crashkernel_low(crash_low_size)) {
> +	if (crash_base >= SZ_4G && !high) 
> +		crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE;
> +
> +	if (reserve_crashkernel_low(crash_low_size)) {
>  		memblock_phys_free(crash_base, crash_size);
>  		return;
>  	}

It feels like {} may need to be added here so that it is in branch "if (crash_base >= SZ_4G)".
The case of "crashkernel=128M" will not fall back to high memory and does not need to reserve
low memory again.

> 
>>   *
>> @@ -81,29 +105,62 @@ phys_addr_t arm64_dma_phys_limit __ro_after_init;
>>  static void __init reserve_crashkernel(void)
>>  {
>>  	unsigned long long crash_base, crash_size;
>> +	unsigned long long crash_low_size = SZ_256M;
>>  	unsigned long long crash_max = CRASH_ADDR_LOW_MAX;
>>  	int ret;
>> +	bool fixed_base;
>> +	char *cmdline = boot_command_line;
>>  
>> -	ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
>> +	/* crashkernel=X[@offset] */
>> +	ret = parse_crashkernel(cmdline, memblock_phys_mem_size(),
>>  				&crash_size, &crash_base);
>> -	/* no crashkernel= or invalid value specified */
>> -	if (ret || !crash_size)
>> -		return;
>> +	if (ret || !crash_size) {
>> +		unsigned long long low_size;
>>  
>> +		/* crashkernel=X,high */
>> +		ret = parse_crashkernel_high(cmdline, 0, &crash_size, &crash_base);
>> +		if (ret || !crash_size)
>> +			return;
>> +
>> +		/* crashkernel=X,low */
>> +		ret = parse_crashkernel_low(cmdline, 0, &low_size, &crash_base);
>> +		if (!ret)
>> +			crash_low_size = low_size;
>> +
>> +		crash_max = CRASH_ADDR_HIGH_MAX;
>> +	}
>> +
>> +	fixed_base = !!crash_base;
>>  	crash_size = PAGE_ALIGN(crash_size);
>>  
>>  	/* User specifies base address explicitly. */
>>  	if (crash_base)
>>  		crash_max = crash_base + crash_size;
>>  
>> +retry:
>>  	crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN,
>>  					       crash_base, crash_max);
>>  	if (!crash_base) {
>> +		/*
>> +		 * Attempt to fully allocate low memory failed, fall back
>> +		 * to high memory, the minimum required low memory will be
>> +		 * reserved later.
>> +		 */
>> +		if (!fixed_base && (crash_max == CRASH_ADDR_LOW_MAX)) {
>> +			crash_max = CRASH_ADDR_HIGH_MAX;
>> +			goto retry;
>> +		}
>> +
>>  		pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
>>  			crash_size);
>>  		return;
>>  	}
>>  
>> +	if (crash_base >= SZ_4G && reserve_crashkernel_low(crash_low_size)) {
>> +		memblock_phys_free(crash_base, crash_size);
>> +		return;
>> +	}
>> +
>>  	pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n",
>>  		crash_base, crash_base + crash_size, crash_size >> 20);
>>  
>> @@ -112,6 +169,9 @@ static void __init reserve_crashkernel(void)
>>  	 * map. Inform kmemleak so that it won't try to access it.
>>  	 */
>>  	kmemleak_ignore_phys(crash_base);
>> +	if (crashk_low_res.end)
>> +		kmemleak_ignore_phys(crashk_low_res.start);
>> +
>>  	crashk_res.start = crash_base;
>>  	crashk_res.end = crash_base + crash_size - 1;
>>  	insert_resource(&iomem_resource, &crashk_res);
>> -- 
>> 2.25.1
>>
> 
> .
>
Leizhen (ThunderTown) Feb. 16, 2022, 2:58 a.m. UTC | #9
On 2022/2/14 15:53, Leizhen (ThunderTown) wrote:
> 
> 
> On 2022/2/14 11:52, Baoquan He wrote:
>> On 01/24/22 at 04:47pm, Zhen Lei wrote:
>> ......
>>> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
>>> index 6c653a2c7cff052..a5d43feac0d7d96 100644
>>> --- a/arch/arm64/mm/init.c
>>> +++ b/arch/arm64/mm/init.c
>>> @@ -71,6 +71,30 @@ phys_addr_t arm64_dma_phys_limit __ro_after_init;
>>>  #define CRASH_ADDR_LOW_MAX	arm64_dma_phys_limit
>>>  #define CRASH_ADDR_HIGH_MAX	MEMBLOCK_ALLOC_ACCESSIBLE
>>>  
>>> +static int __init reserve_crashkernel_low(unsigned long long low_size)
>>> +{
>>> +	unsigned long long low_base;
>>> +
>>> +	/* passed with crashkernel=0,low ? */
>>> +	if (!low_size)
>>> +		return 0;
>>> +
>>> +	low_base = memblock_phys_alloc_range(low_size, CRASH_ALIGN, 0, CRASH_ADDR_LOW_MAX);
>>> +	if (!low_base) {
>>> +		pr_err("cannot allocate crashkernel low memory (size:0x%llx).\n", low_size);
>>> +		return -ENOMEM;
>>> +	}
>>> +
>>> +	pr_info("crashkernel low memory reserved: 0x%llx - 0x%llx (%lld MB)\n",
>>> +		low_base, low_base + low_size, low_size >> 20);
>>> +
>>> +	crashk_low_res.start = low_base;
>>> +	crashk_low_res.end   = low_base + low_size - 1;
>>> +	insert_resource(&iomem_resource, &crashk_low_res);
>>> +
>>> +	return 0;
>>> +}
>>> +
>>>  /*
>>>   * reserve_crashkernel() - reserves memory for crash kernel
>>
>> My another concern is the crashkernel=,low handling. In this patch, the
>> code related to low memory is obscure. Wondering if we should make them
>> explicit with a little redundant but very clear code flows. Saying this
>> because the code must be very clear to you and reviewers, it may be
>> harder for later code reader or anyone interested to understand.
>>
>> 1) crashkernel=X,high
>> 2) crashkernel=X,high crashkernel=Y,low
>> 3) crashkernel=X,high crashkernel=0,low
>> 4) crashkernel=X,high crashkernel='messy code',low
>> 5) crashkernel=X //fall back to high memory, low memory is required then.
>>
>> It could be me thinking about it too much. I made changes to your patch
>> with a tuning, not sure if it's OK to you. Otherwise, this patchset
> 
> I think it's good.
> 
>> works very well for all above test cases, it's ripe to be merged for
>> wider testing.
> 
> I will test it tomorrow. I've prepared a little more use cases than yours.

After the following modifications, I have tested it and it works well. Passed
all the test cases I prepared.

> 
> 1) crashkernel=4G						//high=4G, low=256M
> 2) crashkernel=4G crashkernel=512M,high crashkernel=512M,low	//high=4G, low=256M, high and low are ignored
> 3) crashkernel=4G crashkernel=512M,high				//high=4G, low=256M, high is ignored
> 4) crashkernel=4G crashkernel=512M,low				//high=4G, low=256M, low is ignored
> 5) crashkernel=4G@0xe0000000					//high=0G, low=0M, cannot allocate, failed
> 6) crashkernel=512M						//high=0G, low=512M
> 7) crashkernel=128M						//high=0G, low=128M
> 8) crashkernel=512M@0xde000000		//512M@3552M		//high=0G, low=512M
> 9) crashkernel=4G,high						//high=4G, low=256M
> a) crashkernel=4G,high crashkernel=512M,low			//high=4G, low=512M
> b) crashkernel=512M,high crashkernel=128M,low			//high=512M, low=128M
> c) crashkernel=512M,low						//high=0G, low=0M, invalid
> 
> 
>>
>> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
>> index a5d43feac0d7..671862c56d7d 100644
>> --- a/arch/arm64/mm/init.c
>> +++ b/arch/arm64/mm/init.c
>> @@ -94,7 +94,8 @@ static int __init reserve_crashkernel_low(unsigned long long low_size)
>>  
>>  	return 0;
>>  }
>> -
>> +/*Words explaining why it's 256M*/
>> +#define DEFAULT_CRASH_KERNEL_LOW_SIZE SZ_256M

It's an empirical value.

94fb9334182284e8e7e4bcb9125c25dc33af19d4 x86/crash: Allocate enough low memory when crashkernel=high

    When the crash kernel is loaded above 4GiB in memory, the
    first kernel allocates only 72MiB of low-memory for the DMA
    requirements of the second kernel. On systems with many
    devices this is not enough and causes device driver
    initialization errors and failed crash dumps. Testing by
    SUSE and Redhat has shown that 256MiB is a good default
    value for now and the discussion has lead to this value as
    well. So set this default value to 256MiB to make sure there
    is enough memory available for DMA.


>>  /*
>>   * reserve_crashkernel() - reserves memory for crash kernel
>>   *
>> @@ -105,10 +106,10 @@ static int __init reserve_crashkernel_low(unsigned long long low_size)
>>  static void __init reserve_crashkernel(void)
>>  {
>>  	unsigned long long crash_base, crash_size;
>> -	unsigned long long crash_low_size = SZ_256M;
>> +	unsigned long long crash_low_size;
>>  	unsigned long long crash_max = CRASH_ADDR_LOW_MAX;
>>  	int ret;
>> -	bool fixed_base;
>> +	bool fixed_base, high;

high = false;

>>  	char *cmdline = boot_command_line;
>>  
>>  	/* crashkernel=X[@offset] */
>> @@ -126,7 +127,10 @@ static void __init reserve_crashkernel(void)
>>  		ret = parse_crashkernel_low(cmdline, 0, &low_size, &crash_base);
>>  		if (!ret)
>>  			crash_low_size = low_size;
>> +		else
>> +			crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE;
>>  
>> +		high = true;
>>  		crash_max = CRASH_ADDR_HIGH_MAX;
>>  	}
>>  
>> @@ -134,7 +138,7 @@ static void __init reserve_crashkernel(void)
>>  	crash_size = PAGE_ALIGN(crash_size);
>>  
>>  	/* User specifies base address explicitly. */
>> -	if (crash_base)
>> +	if (fixed_base)
>>  		crash_max = crash_base + crash_size;
>>  
>>  retry:
>> @@ -156,7 +160,10 @@ static void __init reserve_crashkernel(void)
>>  		return;
>>  	}
>>  
>> -	if (crash_base >= SZ_4G && reserve_crashkernel_low(crash_low_size)) {
>> +	if (crash_base >= SZ_4G && !high) 
>> +		crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE;
>> +
>> +	if (reserve_crashkernel_low(crash_low_size)) {
>>  		memblock_phys_free(crash_base, crash_size);
>>  		return;
>>  	}

-       if (crash_base >= SZ_4G && reserve_crashkernel_low(crash_low_size)) {
-               memblock_phys_free(crash_base, crash_size);
-               return;
+       if (crash_base >= SZ_4G) {
+               if (!high)
+                       crash_low_size = SZ_256M;
+
+               if (reserve_crashkernel_low(crash_low_size)) {
+                       memblock_phys_free(crash_base, crash_size);
+                       return;
+               }
        }

Looks like changing 'high' to 'low' would be more accurate. Whether crashkernel=Y,low is specified.


> 
> It feels like {} may need to be added here so that it is in branch "if (crash_base >= SZ_4G)".
> The case of "crashkernel=128M" will not fall back to high memory and does not need to reserve
> low memory again.
> 
>>
>>>   *
>>> @@ -81,29 +105,62 @@ phys_addr_t arm64_dma_phys_limit __ro_after_init;
>>>  static void __init reserve_crashkernel(void)
>>>  {
>>>  	unsigned long long crash_base, crash_size;
>>> +	unsigned long long crash_low_size = SZ_256M;
>>>  	unsigned long long crash_max = CRASH_ADDR_LOW_MAX;
>>>  	int ret;
>>> +	bool fixed_base;
>>> +	char *cmdline = boot_command_line;
>>>  
>>> -	ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
>>> +	/* crashkernel=X[@offset] */
>>> +	ret = parse_crashkernel(cmdline, memblock_phys_mem_size(),
>>>  				&crash_size, &crash_base);
>>> -	/* no crashkernel= or invalid value specified */
>>> -	if (ret || !crash_size)
>>> -		return;
>>> +	if (ret || !crash_size) {
>>> +		unsigned long long low_size;
>>>  
>>> +		/* crashkernel=X,high */
>>> +		ret = parse_crashkernel_high(cmdline, 0, &crash_size, &crash_base);
>>> +		if (ret || !crash_size)
>>> +			return;
>>> +
>>> +		/* crashkernel=X,low */
>>> +		ret = parse_crashkernel_low(cmdline, 0, &low_size, &crash_base);
>>> +		if (!ret)
>>> +			crash_low_size = low_size;
>>> +
>>> +		crash_max = CRASH_ADDR_HIGH_MAX;
>>> +	}
>>> +
>>> +	fixed_base = !!crash_base;
>>>  	crash_size = PAGE_ALIGN(crash_size);
>>>  
>>>  	/* User specifies base address explicitly. */
>>>  	if (crash_base)
>>>  		crash_max = crash_base + crash_size;
>>>  
>>> +retry:
>>>  	crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN,
>>>  					       crash_base, crash_max);
>>>  	if (!crash_base) {
>>> +		/*
>>> +		 * Attempt to fully allocate low memory failed, fall back
>>> +		 * to high memory, the minimum required low memory will be
>>> +		 * reserved later.
>>> +		 */
>>> +		if (!fixed_base && (crash_max == CRASH_ADDR_LOW_MAX)) {
>>> +			crash_max = CRASH_ADDR_HIGH_MAX;
>>> +			goto retry;
>>> +		}
>>> +
>>>  		pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
>>>  			crash_size);
>>>  		return;
>>>  	}
>>>  
>>> +	if (crash_base >= SZ_4G && reserve_crashkernel_low(crash_low_size)) {
>>> +		memblock_phys_free(crash_base, crash_size);
>>> +		return;
>>> +	}
>>> +
>>>  	pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n",
>>>  		crash_base, crash_base + crash_size, crash_size >> 20);
>>>  
>>> @@ -112,6 +169,9 @@ static void __init reserve_crashkernel(void)
>>>  	 * map. Inform kmemleak so that it won't try to access it.
>>>  	 */
>>>  	kmemleak_ignore_phys(crash_base);
>>> +	if (crashk_low_res.end)
>>> +		kmemleak_ignore_phys(crashk_low_res.start);
>>> +
>>>  	crashk_res.start = crash_base;
>>>  	crashk_res.end = crash_base + crash_size - 1;
>>>  	insert_resource(&iomem_resource, &crashk_res);
>>> -- 
>>> 2.25.1
>>>
>>
>> .
>>
>
Baoquan He Feb. 16, 2022, 10:20 a.m. UTC | #10
On 02/16/22 at 10:58am, Leizhen (ThunderTown) wrote:
> 
> 
> On 2022/2/14 15:53, Leizhen (ThunderTown) wrote:
> > 
> > 
> > On 2022/2/14 11:52, Baoquan He wrote:
> >> On 01/24/22 at 04:47pm, Zhen Lei wrote:
> >> ......
> >>> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
> >>> index 6c653a2c7cff052..a5d43feac0d7d96 100644
> >>> --- a/arch/arm64/mm/init.c
> >>> +++ b/arch/arm64/mm/init.c
> >>> @@ -71,6 +71,30 @@ phys_addr_t arm64_dma_phys_limit __ro_after_init;
> >>>  #define CRASH_ADDR_LOW_MAX	arm64_dma_phys_limit
> >>>  #define CRASH_ADDR_HIGH_MAX	MEMBLOCK_ALLOC_ACCESSIBLE
> >>>  
> >>> +static int __init reserve_crashkernel_low(unsigned long long low_size)
> >>> +{
> >>> +	unsigned long long low_base;
> >>> +
> >>> +	/* passed with crashkernel=0,low ? */
> >>> +	if (!low_size)
> >>> +		return 0;
> >>> +
> >>> +	low_base = memblock_phys_alloc_range(low_size, CRASH_ALIGN, 0, CRASH_ADDR_LOW_MAX);
> >>> +	if (!low_base) {
> >>> +		pr_err("cannot allocate crashkernel low memory (size:0x%llx).\n", low_size);
> >>> +		return -ENOMEM;
> >>> +	}
> >>> +
> >>> +	pr_info("crashkernel low memory reserved: 0x%llx - 0x%llx (%lld MB)\n",
> >>> +		low_base, low_base + low_size, low_size >> 20);
> >>> +
> >>> +	crashk_low_res.start = low_base;
> >>> +	crashk_low_res.end   = low_base + low_size - 1;
> >>> +	insert_resource(&iomem_resource, &crashk_low_res);
> >>> +
> >>> +	return 0;
> >>> +}
> >>> +
> >>>  /*
> >>>   * reserve_crashkernel() - reserves memory for crash kernel
> >>
> >> My another concern is the crashkernel=,low handling. In this patch, the
> >> code related to low memory is obscure. Wondering if we should make them
> >> explicit with a little redundant but very clear code flows. Saying this
> >> because the code must be very clear to you and reviewers, it may be
> >> harder for later code reader or anyone interested to understand.
> >>
> >> 1) crashkernel=X,high
> >> 2) crashkernel=X,high crashkernel=Y,low
> >> 3) crashkernel=X,high crashkernel=0,low
> >> 4) crashkernel=X,high crashkernel='messy code',low
> >> 5) crashkernel=X //fall back to high memory, low memory is required then.
> >>
> >> It could be me thinking about it too much. I made changes to your patch
> >> with a tuning, not sure if it's OK to you. Otherwise, this patchset
> > 
> > I think it's good.
> > 
> >> works very well for all above test cases, it's ripe to be merged for
> >> wider testing.
> > 
> > I will test it tomorrow. I've prepared a little more use cases than yours.
> 
> After the following modifications, I have tested it and it works well. Passed
> all the test cases I prepared.

That's great.

You might need to add 'crashkernel=xM, crashkernel=0,low',
'crashkernel=xM, crashkernel='messy code',low' to your test cases.

> 
> > 
> > 1) crashkernel=4G						//high=4G, low=256M
> > 2) crashkernel=4G crashkernel=512M,high crashkernel=512M,low	//high=4G, low=256M, high and low are ignored
> > 3) crashkernel=4G crashkernel=512M,high				//high=4G, low=256M, high is ignored
> > 4) crashkernel=4G crashkernel=512M,low				//high=4G, low=256M, low is ignored
> > 5) crashkernel=4G@0xe0000000					//high=0G, low=0M, cannot allocate, failed
> > 6) crashkernel=512M						//high=0G, low=512M
> > 7) crashkernel=128M						//high=0G, low=128M
> > 8) crashkernel=512M@0xde000000		//512M@3552M		//high=0G, low=512M
> > 9) crashkernel=4G,high						//high=4G, low=256M
> > a) crashkernel=4G,high crashkernel=512M,low			//high=4G, low=512M
> > b) crashkernel=512M,high crashkernel=128M,low			//high=512M, low=128M
> > c) crashkernel=512M,low						//high=0G, low=0M, invalid
> > 
> > 
> >>
> >> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
> >> index a5d43feac0d7..671862c56d7d 100644
> >> --- a/arch/arm64/mm/init.c
> >> +++ b/arch/arm64/mm/init.c
> >> @@ -94,7 +94,8 @@ static int __init reserve_crashkernel_low(unsigned long long low_size)
> >>  
> >>  	return 0;
> >>  }
> >> -
> >> +/*Words explaining why it's 256M*/
> >> +#define DEFAULT_CRASH_KERNEL_LOW_SIZE SZ_256M
> 
> It's an empirical value.
> 
> 94fb9334182284e8e7e4bcb9125c25dc33af19d4 x86/crash: Allocate enough low memory when crashkernel=high
> 
>     When the crash kernel is loaded above 4GiB in memory, the
>     first kernel allocates only 72MiB of low-memory for the DMA
>     requirements of the second kernel. On systems with many
>     devices this is not enough and causes device driver
>     initialization errors and failed crash dumps. Testing by
>     SUSE and Redhat has shown that 256MiB is a good default
>     value for now and the discussion has lead to this value as
>     well. So set this default value to 256MiB to make sure there
>     is enough memory available for DMA.

Then, some words like below can be added. I am not confident it's good
enought, hope someone else can help to polish it.

/*
 * This is an empirical value in x86_64 and taken here directly. Please
 * refer to code comment in reserve_crashkernel_low() of x86_64 for more
 * details.
 */
#define DEFAULT_CRASH_KERNEL_LOW_SIZE SZ_256M

> 
> 
> >>  /*
> >>   * reserve_crashkernel() - reserves memory for crash kernel
> >>   *
> >> @@ -105,10 +106,10 @@ static int __init reserve_crashkernel_low(unsigned long long low_size)
> >>  static void __init reserve_crashkernel(void)
> >>  {
> >>  	unsigned long long crash_base, crash_size;
> >> -	unsigned long long crash_low_size = SZ_256M;
> >> +	unsigned long long crash_low_size;
> >>  	unsigned long long crash_max = CRASH_ADDR_LOW_MAX;
> >>  	int ret;
> >> -	bool fixed_base;
> >> +	bool fixed_base, high;
> 
> high = false;
> 
> >>  	char *cmdline = boot_command_line;
> >>  
> >>  	/* crashkernel=X[@offset] */
> >> @@ -126,7 +127,10 @@ static void __init reserve_crashkernel(void)
> >>  		ret = parse_crashkernel_low(cmdline, 0, &low_size, &crash_base);
> >>  		if (!ret)
> >>  			crash_low_size = low_size;
> >> +		else
> >> +			crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE;
> >>  
> >> +		high = true;
> >>  		crash_max = CRASH_ADDR_HIGH_MAX;
> >>  	}
> >>  
> >> @@ -134,7 +138,7 @@ static void __init reserve_crashkernel(void)
> >>  	crash_size = PAGE_ALIGN(crash_size);
> >>  
> >>  	/* User specifies base address explicitly. */
> >> -	if (crash_base)
> >> +	if (fixed_base)
> >>  		crash_max = crash_base + crash_size;
> >>  
> >>  retry:
> >> @@ -156,7 +160,10 @@ static void __init reserve_crashkernel(void)
> >>  		return;
> >>  	}
> >>  
> >> -	if (crash_base >= SZ_4G && reserve_crashkernel_low(crash_low_size)) {
> >> +	if (crash_base >= SZ_4G && !high) 
> >> +		crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE;
> >> +
> >> +	if (reserve_crashkernel_low(crash_low_size)) {
> >>  		memblock_phys_free(crash_base, crash_size);
> >>  		return;
> >>  	}
> 
> -       if (crash_base >= SZ_4G && reserve_crashkernel_low(crash_low_size)) {
> -               memblock_phys_free(crash_base, crash_size);
> -               return;
> +       if (crash_base >= SZ_4G) {
> +               if (!high)
> +                       crash_low_size = SZ_256M;
> +
> +               if (reserve_crashkernel_low(crash_low_size)) {
> +                       memblock_phys_free(crash_base, crash_size);
> +                       return;
> +               }
>         }
> 
> Looks like changing 'high' to 'low' would be more accurate. Whether crashkernel=Y,low is specified.

What I menat is like below, we even can add code comment to make it more
clearer.

static void __init reserve_crashkernel(void)
{

        /* crashkernel=X[@offset] */
        ret = parse_crashkernel(cmdline, memblock_phys_mem_size(),
                                &crash_size, &crash_base);
        if (ret || !crash_size) {
                unsigned long long low_size;

                /* crashkernel=X,high */
                ret = parse_crashkernel_high(cmdline, 0, &crash_size, &crash_base);
                if (ret || !crash_size)
                        return;

                /* crashkernel=X,low */
                ret = parse_crashkernel_low(cmdline, 0, &low_size, &crash_base);
		//case #1, crashkernel=yM,low is specified explicitly in cmdline
                if (!ret)
                        crash_low_size = low_size;
		else //case #2, crashkernel=yM,low is not specified explicitly
                        crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE;

		//high means crashkernel,high is specified explicitly
		high = true;
                crash_max = CRASH_ADDR_HIGH_MAX;
        }

        fixed_base = !!crash_base;
        crash_size = PAGE_ALIGN(crash_size);

        /* User specifies base address explicitly. */
        if (crash_base)
                crash_max = crash_base + crash_size;
retry:
        crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN,
                                               crash_base, crash_max);
        if (!crash_base) {
                /*
                 * Attempt to fully allocate low memory failed, fall back
                 * to high memory, the minimum required low memory will be
                 * reserved later.
                 */
                if (!fixed_base && (crash_max == CRASH_ADDR_LOW_MAX)) {
                        crash_max = CRASH_ADDR_HIGH_MAX;
                        goto retry;
                }

                pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
                        crash_size);
                return;
        }


	//case #3: get crashkernel from high memory through fallback, let's set crashkernel,low too.
        if (crash_base >= SZ_4G && !high)
		crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE;	

        if (reserve_crashkernel_low(crash_low_size)) {
                memblock_phys_free(crash_base, crash_size);
                return;
        }

        pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n",
                crash_base, crash_base + crash_size, crash_size >> 20);

        /*
         * The crashkernel memory will be removed from the kernel linear
         * map. Inform kmemleak so that it won't try to access it.
         */
        kmemleak_ignore_phys(crash_base);
        if (crashk_low_res.end)
                kmemleak_ignore_phys(crashk_low_res.start);

        crashk_res.start = crash_base;
        crashk_res.end = crash_base + crash_size - 1;
        insert_resource(&iomem_resource, &crashk_res);
}


> 
> 
> > 
> > It feels like {} may need to be added here so that it is in branch "if (crash_base >= SZ_4G)".
> > The case of "crashkernel=128M" will not fall back to high memory and does not need to reserve
> > low memory again.
> > 
> >>
> >>>   *
> >>> @@ -81,29 +105,62 @@ phys_addr_t arm64_dma_phys_limit __ro_after_init;
> >>>  static void __init reserve_crashkernel(void)
> >>>  {
> >>>  	unsigned long long crash_base, crash_size;
> >>> +	unsigned long long crash_low_size = SZ_256M;
> >>>  	unsigned long long crash_max = CRASH_ADDR_LOW_MAX;
> >>>  	int ret;
> >>> +	bool fixed_base;
> >>> +	char *cmdline = boot_command_line;
> >>>  
> >>> -	ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
> >>> +	/* crashkernel=X[@offset] */
> >>> +	ret = parse_crashkernel(cmdline, memblock_phys_mem_size(),
> >>>  				&crash_size, &crash_base);
> >>> -	/* no crashkernel= or invalid value specified */
> >>> -	if (ret || !crash_size)
> >>> -		return;
> >>> +	if (ret || !crash_size) {
> >>> +		unsigned long long low_size;
> >>>  
> >>> +		/* crashkernel=X,high */
> >>> +		ret = parse_crashkernel_high(cmdline, 0, &crash_size, &crash_base);
> >>> +		if (ret || !crash_size)
> >>> +			return;
> >>> +
> >>> +		/* crashkernel=X,low */
> >>> +		ret = parse_crashkernel_low(cmdline, 0, &low_size, &crash_base);
> >>> +		if (!ret)
> >>> +			crash_low_size = low_size;
> >>> +
> >>> +		crash_max = CRASH_ADDR_HIGH_MAX;
> >>> +	}
> >>> +
> >>> +	fixed_base = !!crash_base;
> >>>  	crash_size = PAGE_ALIGN(crash_size);
> >>>  
> >>>  	/* User specifies base address explicitly. */
> >>>  	if (crash_base)
> >>>  		crash_max = crash_base + crash_size;
> >>>  
> >>> +retry:
> >>>  	crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN,
> >>>  					       crash_base, crash_max);
> >>>  	if (!crash_base) {
> >>> +		/*
> >>> +		 * Attempt to fully allocate low memory failed, fall back
> >>> +		 * to high memory, the minimum required low memory will be
> >>> +		 * reserved later.
> >>> +		 */
> >>> +		if (!fixed_base && (crash_max == CRASH_ADDR_LOW_MAX)) {
> >>> +			crash_max = CRASH_ADDR_HIGH_MAX;
> >>> +			goto retry;
> >>> +		}
> >>> +
> >>>  		pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
> >>>  			crash_size);
> >>>  		return;
> >>>  	}
> >>>  
> >>> +	if (crash_base >= SZ_4G && reserve_crashkernel_low(crash_low_size)) {
> >>> +		memblock_phys_free(crash_base, crash_size);
> >>> +		return;
> >>> +	}
> >>> +
> >>>  	pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n",
> >>>  		crash_base, crash_base + crash_size, crash_size >> 20);
> >>>  
> >>> @@ -112,6 +169,9 @@ static void __init reserve_crashkernel(void)
> >>>  	 * map. Inform kmemleak so that it won't try to access it.
> >>>  	 */
> >>>  	kmemleak_ignore_phys(crash_base);
> >>> +	if (crashk_low_res.end)
> >>> +		kmemleak_ignore_phys(crashk_low_res.start);
> >>> +
> >>>  	crashk_res.start = crash_base;
> >>>  	crashk_res.end = crash_base + crash_size - 1;
> >>>  	insert_resource(&iomem_resource, &crashk_res);
> >>> -- 
> >>> 2.25.1
> >>>
> >>
> >> .
> >>
> > 
> 
> -- 
> Regards,
>   Zhen Lei
>
Leizhen (ThunderTown) Feb. 17, 2022, 1:57 a.m. UTC | #11
On 2022/2/16 18:20, Baoquan He wrote:
> On 02/16/22 at 10:58am, Leizhen (ThunderTown) wrote:
>>
>>
>> On 2022/2/14 15:53, Leizhen (ThunderTown) wrote:
>>>
>>>
>>> On 2022/2/14 11:52, Baoquan He wrote:
>>>> On 01/24/22 at 04:47pm, Zhen Lei wrote:
>>>> ......
>>>>> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
>>>>> index 6c653a2c7cff052..a5d43feac0d7d96 100644
>>>>> --- a/arch/arm64/mm/init.c
>>>>> +++ b/arch/arm64/mm/init.c
>>>>> @@ -71,6 +71,30 @@ phys_addr_t arm64_dma_phys_limit __ro_after_init;
>>>>>  #define CRASH_ADDR_LOW_MAX	arm64_dma_phys_limit
>>>>>  #define CRASH_ADDR_HIGH_MAX	MEMBLOCK_ALLOC_ACCESSIBLE
>>>>>  
>>>>> +static int __init reserve_crashkernel_low(unsigned long long low_size)
>>>>> +{
>>>>> +	unsigned long long low_base;
>>>>> +
>>>>> +	/* passed with crashkernel=0,low ? */
>>>>> +	if (!low_size)
>>>>> +		return 0;
>>>>> +
>>>>> +	low_base = memblock_phys_alloc_range(low_size, CRASH_ALIGN, 0, CRASH_ADDR_LOW_MAX);
>>>>> +	if (!low_base) {
>>>>> +		pr_err("cannot allocate crashkernel low memory (size:0x%llx).\n", low_size);
>>>>> +		return -ENOMEM;
>>>>> +	}
>>>>> +
>>>>> +	pr_info("crashkernel low memory reserved: 0x%llx - 0x%llx (%lld MB)\n",
>>>>> +		low_base, low_base + low_size, low_size >> 20);
>>>>> +
>>>>> +	crashk_low_res.start = low_base;
>>>>> +	crashk_low_res.end   = low_base + low_size - 1;
>>>>> +	insert_resource(&iomem_resource, &crashk_low_res);
>>>>> +
>>>>> +	return 0;
>>>>> +}
>>>>> +
>>>>>  /*
>>>>>   * reserve_crashkernel() - reserves memory for crash kernel
>>>>
>>>> My another concern is the crashkernel=,low handling. In this patch, the
>>>> code related to low memory is obscure. Wondering if we should make them
>>>> explicit with a little redundant but very clear code flows. Saying this
>>>> because the code must be very clear to you and reviewers, it may be
>>>> harder for later code reader or anyone interested to understand.
>>>>
>>>> 1) crashkernel=X,high
>>>> 2) crashkernel=X,high crashkernel=Y,low
>>>> 3) crashkernel=X,high crashkernel=0,low
>>>> 4) crashkernel=X,high crashkernel='messy code',low
>>>> 5) crashkernel=X //fall back to high memory, low memory is required then.
>>>>
>>>> It could be me thinking about it too much. I made changes to your patch
>>>> with a tuning, not sure if it's OK to you. Otherwise, this patchset
>>>
>>> I think it's good.
>>>
>>>> works very well for all above test cases, it's ripe to be merged for
>>>> wider testing.
>>>
>>> I will test it tomorrow. I've prepared a little more use cases than yours.
>>
>> After the following modifications, I have tested it and it works well. Passed
>> all the test cases I prepared.
> 
> That's great.
> 
> You might need to add 'crashkernel=xM, crashkernel=0,low',
> 'crashkernel=xM, crashkernel='messy code',low' to your test cases.

Oh, right, I will add them.

> 
>>
>>>
>>> 1) crashkernel=4G						//high=4G, low=256M
>>> 2) crashkernel=4G crashkernel=512M,high crashkernel=512M,low	//high=4G, low=256M, high and low are ignored
>>> 3) crashkernel=4G crashkernel=512M,high				//high=4G, low=256M, high is ignored
>>> 4) crashkernel=4G crashkernel=512M,low				//high=4G, low=256M, low is ignored
>>> 5) crashkernel=4G@0xe0000000					//high=0G, low=0M, cannot allocate, failed
>>> 6) crashkernel=512M						//high=0G, low=512M
>>> 7) crashkernel=128M						//high=0G, low=128M
>>> 8) crashkernel=512M@0xde000000		//512M@3552M		//high=0G, low=512M
>>> 9) crashkernel=4G,high						//high=4G, low=256M
>>> a) crashkernel=4G,high crashkernel=512M,low			//high=4G, low=512M
>>> b) crashkernel=512M,high crashkernel=128M,low			//high=512M, low=128M
>>> c) crashkernel=512M,low						//high=0G, low=0M, invalid
>>>
>>>
>>>>
>>>> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
>>>> index a5d43feac0d7..671862c56d7d 100644
>>>> --- a/arch/arm64/mm/init.c
>>>> +++ b/arch/arm64/mm/init.c
>>>> @@ -94,7 +94,8 @@ static int __init reserve_crashkernel_low(unsigned long long low_size)
>>>>  
>>>>  	return 0;
>>>>  }
>>>> -
>>>> +/*Words explaining why it's 256M*/
>>>> +#define DEFAULT_CRASH_KERNEL_LOW_SIZE SZ_256M
>>
>> It's an empirical value.
>>
>> 94fb9334182284e8e7e4bcb9125c25dc33af19d4 x86/crash: Allocate enough low memory when crashkernel=high
>>
>>     When the crash kernel is loaded above 4GiB in memory, the
>>     first kernel allocates only 72MiB of low-memory for the DMA
>>     requirements of the second kernel. On systems with many
>>     devices this is not enough and causes device driver
>>     initialization errors and failed crash dumps. Testing by
>>     SUSE and Redhat has shown that 256MiB is a good default
>>     value for now and the discussion has lead to this value as
>>     well. So set this default value to 256MiB to make sure there
>>     is enough memory available for DMA.
> 
> Then, some words like below can be added. I am not confident it's good
> enought, hope someone else can help to polish it.
> 
> /*
>  * This is an empirical value in x86_64 and taken here directly. Please
>  * refer to code comment in reserve_crashkernel_low() of x86_64 for more
>  * details.
>  */
> #define DEFAULT_CRASH_KERNEL_LOW_SIZE SZ_256M

I think it's good. If no correction is made, I will use it.

"code comment" --> "the code comment"

> 
>>
>>
>>>>  /*
>>>>   * reserve_crashkernel() - reserves memory for crash kernel
>>>>   *
>>>> @@ -105,10 +106,10 @@ static int __init reserve_crashkernel_low(unsigned long long low_size)
>>>>  static void __init reserve_crashkernel(void)
>>>>  {
>>>>  	unsigned long long crash_base, crash_size;
>>>> -	unsigned long long crash_low_size = SZ_256M;
>>>> +	unsigned long long crash_low_size;
>>>>  	unsigned long long crash_max = CRASH_ADDR_LOW_MAX;
>>>>  	int ret;
>>>> -	bool fixed_base;
>>>> +	bool fixed_base, high;
>>
>> high = false;
>>
>>>>  	char *cmdline = boot_command_line;
>>>>  
>>>>  	/* crashkernel=X[@offset] */
>>>> @@ -126,7 +127,10 @@ static void __init reserve_crashkernel(void)
>>>>  		ret = parse_crashkernel_low(cmdline, 0, &low_size, &crash_base);
>>>>  		if (!ret)
>>>>  			crash_low_size = low_size;
>>>> +		else
>>>> +			crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE;
>>>>  
>>>> +		high = true;
>>>>  		crash_max = CRASH_ADDR_HIGH_MAX;
>>>>  	}
>>>>  
>>>> @@ -134,7 +138,7 @@ static void __init reserve_crashkernel(void)
>>>>  	crash_size = PAGE_ALIGN(crash_size);
>>>>  
>>>>  	/* User specifies base address explicitly. */
>>>> -	if (crash_base)
>>>> +	if (fixed_base)
>>>>  		crash_max = crash_base + crash_size;
>>>>  
>>>>  retry:
>>>> @@ -156,7 +160,10 @@ static void __init reserve_crashkernel(void)
>>>>  		return;
>>>>  	}
>>>>  
>>>> -	if (crash_base >= SZ_4G && reserve_crashkernel_low(crash_low_size)) {
>>>> +	if (crash_base >= SZ_4G && !high) 
>>>> +		crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE;
>>>> +
>>>> +	if (reserve_crashkernel_low(crash_low_size)) {
>>>>  		memblock_phys_free(crash_base, crash_size);
>>>>  		return;
>>>>  	}
>>
>> -       if (crash_base >= SZ_4G && reserve_crashkernel_low(crash_low_size)) {
>> -               memblock_phys_free(crash_base, crash_size);
>> -               return;
>> +       if (crash_base >= SZ_4G) {
>> +               if (!high)
>> +                       crash_low_size = SZ_256M;
>> +
>> +               if (reserve_crashkernel_low(crash_low_size)) {
>> +                       memblock_phys_free(crash_base, crash_size);
>> +                       return;
>> +               }
>>         }
>>
>> Looks like changing 'high' to 'low' would be more accurate. Whether crashkernel=Y,low is specified.
> 
> What I menat is like below, we even can add code comment to make it more
> clearer.

OK, I got it. I'll add the necessary comments. Thanks.

> 
> static void __init reserve_crashkernel(void)
> {
> 
>         /* crashkernel=X[@offset] */
>         ret = parse_crashkernel(cmdline, memblock_phys_mem_size(),
>                                 &crash_size, &crash_base);
>         if (ret || !crash_size) {
>                 unsigned long long low_size;
> 
>                 /* crashkernel=X,high */
>                 ret = parse_crashkernel_high(cmdline, 0, &crash_size, &crash_base);
>                 if (ret || !crash_size)
>                         return;
> 
>                 /* crashkernel=X,low */
>                 ret = parse_crashkernel_low(cmdline, 0, &low_size, &crash_base);
> 		//case #1, crashkernel=yM,low is specified explicitly in cmdline
>                 if (!ret)
>                         crash_low_size = low_size;
> 		else //case #2, crashkernel=yM,low is not specified explicitly
>                         crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE;
> 
> 		//high means crashkernel,high is specified explicitly
> 		high = true;
>                 crash_max = CRASH_ADDR_HIGH_MAX;
>         }
> 
>         fixed_base = !!crash_base;
>         crash_size = PAGE_ALIGN(crash_size);
> 
>         /* User specifies base address explicitly. */
>         if (crash_base)
>                 crash_max = crash_base + crash_size;
> retry:
>         crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN,
>                                                crash_base, crash_max);
>         if (!crash_base) {
>                 /*
>                  * Attempt to fully allocate low memory failed, fall back
>                  * to high memory, the minimum required low memory will be
>                  * reserved later.
>                  */
>                 if (!fixed_base && (crash_max == CRASH_ADDR_LOW_MAX)) {
>                         crash_max = CRASH_ADDR_HIGH_MAX;
>                         goto retry;
>                 }
> 
>                 pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
>                         crash_size);
>                 return;
>         }
> 
> 
> 	//case #3: get crashkernel from high memory through fallback, let's set crashkernel,low too.
>         if (crash_base >= SZ_4G && !high)
> 		crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE;	
> 
>         if (reserve_crashkernel_low(crash_low_size)) {
>                 memblock_phys_free(crash_base, crash_size);
>                 return;
>         }
> 
>         pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n",
>                 crash_base, crash_base + crash_size, crash_size >> 20);
> 
>         /*
>          * The crashkernel memory will be removed from the kernel linear
>          * map. Inform kmemleak so that it won't try to access it.
>          */
>         kmemleak_ignore_phys(crash_base);
>         if (crashk_low_res.end)
>                 kmemleak_ignore_phys(crashk_low_res.start);
> 
>         crashk_res.start = crash_base;
>         crashk_res.end = crash_base + crash_size - 1;
>         insert_resource(&iomem_resource, &crashk_res);
> }
> 
> 
>>
>>
>>>
>>> It feels like {} may need to be added here so that it is in branch "if (crash_base >= SZ_4G)".
>>> The case of "crashkernel=128M" will not fall back to high memory and does not need to reserve
>>> low memory again.
>>>
>>>>
>>>>>   *
>>>>> @@ -81,29 +105,62 @@ phys_addr_t arm64_dma_phys_limit __ro_after_init;
>>>>>  static void __init reserve_crashkernel(void)
>>>>>  {
>>>>>  	unsigned long long crash_base, crash_size;
>>>>> +	unsigned long long crash_low_size = SZ_256M;
>>>>>  	unsigned long long crash_max = CRASH_ADDR_LOW_MAX;
>>>>>  	int ret;
>>>>> +	bool fixed_base;
>>>>> +	char *cmdline = boot_command_line;
>>>>>  
>>>>> -	ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
>>>>> +	/* crashkernel=X[@offset] */
>>>>> +	ret = parse_crashkernel(cmdline, memblock_phys_mem_size(),
>>>>>  				&crash_size, &crash_base);
>>>>> -	/* no crashkernel= or invalid value specified */
>>>>> -	if (ret || !crash_size)
>>>>> -		return;
>>>>> +	if (ret || !crash_size) {
>>>>> +		unsigned long long low_size;
>>>>>  
>>>>> +		/* crashkernel=X,high */
>>>>> +		ret = parse_crashkernel_high(cmdline, 0, &crash_size, &crash_base);
>>>>> +		if (ret || !crash_size)
>>>>> +			return;
>>>>> +
>>>>> +		/* crashkernel=X,low */
>>>>> +		ret = parse_crashkernel_low(cmdline, 0, &low_size, &crash_base);
>>>>> +		if (!ret)
>>>>> +			crash_low_size = low_size;
>>>>> +
>>>>> +		crash_max = CRASH_ADDR_HIGH_MAX;
>>>>> +	}
>>>>> +
>>>>> +	fixed_base = !!crash_base;
>>>>>  	crash_size = PAGE_ALIGN(crash_size);
>>>>>  
>>>>>  	/* User specifies base address explicitly. */
>>>>>  	if (crash_base)
>>>>>  		crash_max = crash_base + crash_size;
>>>>>  
>>>>> +retry:
>>>>>  	crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN,
>>>>>  					       crash_base, crash_max);
>>>>>  	if (!crash_base) {
>>>>> +		/*
>>>>> +		 * Attempt to fully allocate low memory failed, fall back
>>>>> +		 * to high memory, the minimum required low memory will be
>>>>> +		 * reserved later.
>>>>> +		 */
>>>>> +		if (!fixed_base && (crash_max == CRASH_ADDR_LOW_MAX)) {
>>>>> +			crash_max = CRASH_ADDR_HIGH_MAX;
>>>>> +			goto retry;
>>>>> +		}
>>>>> +
>>>>>  		pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
>>>>>  			crash_size);
>>>>>  		return;
>>>>>  	}
>>>>>  
>>>>> +	if (crash_base >= SZ_4G && reserve_crashkernel_low(crash_low_size)) {
>>>>> +		memblock_phys_free(crash_base, crash_size);
>>>>> +		return;
>>>>> +	}
>>>>> +
>>>>>  	pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n",
>>>>>  		crash_base, crash_base + crash_size, crash_size >> 20);
>>>>>  
>>>>> @@ -112,6 +169,9 @@ static void __init reserve_crashkernel(void)
>>>>>  	 * map. Inform kmemleak so that it won't try to access it.
>>>>>  	 */
>>>>>  	kmemleak_ignore_phys(crash_base);
>>>>> +	if (crashk_low_res.end)
>>>>> +		kmemleak_ignore_phys(crashk_low_res.start);
>>>>> +
>>>>>  	crashk_res.start = crash_base;
>>>>>  	crashk_res.end = crash_base + crash_size - 1;
>>>>>  	insert_resource(&iomem_resource, &crashk_res);
>>>>> -- 
>>>>> 2.25.1
>>>>>
>>>>
>>>> .
>>>>
>>>
>>
>> -- 
>> Regards,
>>   Zhen Lei
>>
> 
> .
>
diff mbox series

Patch

diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
index e16b248699d5c3c..19c2d487cb08feb 100644
--- a/arch/arm64/kernel/machine_kexec.c
+++ b/arch/arm64/kernel/machine_kexec.c
@@ -329,8 +329,13 @@  bool crash_is_nosave(unsigned long pfn)
 
 	/* in reserved memory? */
 	addr = __pfn_to_phys(pfn);
-	if ((addr < crashk_res.start) || (crashk_res.end < addr))
-		return false;
+	if ((addr < crashk_res.start) || (crashk_res.end < addr)) {
+		if (!crashk_low_res.end)
+			return false;
+
+		if ((addr < crashk_low_res.start) || (crashk_low_res.end < addr))
+			return false;
+	}
 
 	if (!kexec_crash_image)
 		return true;
diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c
index 59c648d51848886..889951291cc0f9c 100644
--- a/arch/arm64/kernel/machine_kexec_file.c
+++ b/arch/arm64/kernel/machine_kexec_file.c
@@ -65,10 +65,18 @@  static int prepare_elf_headers(void **addr, unsigned long *sz)
 
 	/* Exclude crashkernel region */
 	ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end);
+	if (ret)
+		goto out;
+
+	if (crashk_low_res.end) {
+		ret = crash_exclude_mem_range(cmem, crashk_low_res.start, crashk_low_res.end);
+		if (ret)
+			goto out;
+	}
 
-	if (!ret)
-		ret =  crash_prepare_elf64_headers(cmem, true, addr, sz);
+	ret = crash_prepare_elf64_headers(cmem, true, addr, sz);
 
+out:
 	kfree(cmem);
 	return ret;
 }
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 6c653a2c7cff052..a5d43feac0d7d96 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -71,6 +71,30 @@  phys_addr_t arm64_dma_phys_limit __ro_after_init;
 #define CRASH_ADDR_LOW_MAX	arm64_dma_phys_limit
 #define CRASH_ADDR_HIGH_MAX	MEMBLOCK_ALLOC_ACCESSIBLE
 
+static int __init reserve_crashkernel_low(unsigned long long low_size)
+{
+	unsigned long long low_base;
+
+	/* passed with crashkernel=0,low ? */
+	if (!low_size)
+		return 0;
+
+	low_base = memblock_phys_alloc_range(low_size, CRASH_ALIGN, 0, CRASH_ADDR_LOW_MAX);
+	if (!low_base) {
+		pr_err("cannot allocate crashkernel low memory (size:0x%llx).\n", low_size);
+		return -ENOMEM;
+	}
+
+	pr_info("crashkernel low memory reserved: 0x%llx - 0x%llx (%lld MB)\n",
+		low_base, low_base + low_size, low_size >> 20);
+
+	crashk_low_res.start = low_base;
+	crashk_low_res.end   = low_base + low_size - 1;
+	insert_resource(&iomem_resource, &crashk_low_res);
+
+	return 0;
+}
+
 /*
  * reserve_crashkernel() - reserves memory for crash kernel
  *
@@ -81,29 +105,62 @@  phys_addr_t arm64_dma_phys_limit __ro_after_init;
 static void __init reserve_crashkernel(void)
 {
 	unsigned long long crash_base, crash_size;
+	unsigned long long crash_low_size = SZ_256M;
 	unsigned long long crash_max = CRASH_ADDR_LOW_MAX;
 	int ret;
+	bool fixed_base;
+	char *cmdline = boot_command_line;
 
-	ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
+	/* crashkernel=X[@offset] */
+	ret = parse_crashkernel(cmdline, memblock_phys_mem_size(),
 				&crash_size, &crash_base);
-	/* no crashkernel= or invalid value specified */
-	if (ret || !crash_size)
-		return;
+	if (ret || !crash_size) {
+		unsigned long long low_size;
 
+		/* crashkernel=X,high */
+		ret = parse_crashkernel_high(cmdline, 0, &crash_size, &crash_base);
+		if (ret || !crash_size)
+			return;
+
+		/* crashkernel=X,low */
+		ret = parse_crashkernel_low(cmdline, 0, &low_size, &crash_base);
+		if (!ret)
+			crash_low_size = low_size;
+
+		crash_max = CRASH_ADDR_HIGH_MAX;
+	}
+
+	fixed_base = !!crash_base;
 	crash_size = PAGE_ALIGN(crash_size);
 
 	/* User specifies base address explicitly. */
 	if (crash_base)
 		crash_max = crash_base + crash_size;
 
+retry:
 	crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN,
 					       crash_base, crash_max);
 	if (!crash_base) {
+		/*
+		 * Attempt to fully allocate low memory failed, fall back
+		 * to high memory, the minimum required low memory will be
+		 * reserved later.
+		 */
+		if (!fixed_base && (crash_max == CRASH_ADDR_LOW_MAX)) {
+			crash_max = CRASH_ADDR_HIGH_MAX;
+			goto retry;
+		}
+
 		pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
 			crash_size);
 		return;
 	}
 
+	if (crash_base >= SZ_4G && reserve_crashkernel_low(crash_low_size)) {
+		memblock_phys_free(crash_base, crash_size);
+		return;
+	}
+
 	pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n",
 		crash_base, crash_base + crash_size, crash_size >> 20);
 
@@ -112,6 +169,9 @@  static void __init reserve_crashkernel(void)
 	 * map. Inform kmemleak so that it won't try to access it.
 	 */
 	kmemleak_ignore_phys(crash_base);
+	if (crashk_low_res.end)
+		kmemleak_ignore_phys(crashk_low_res.start);
+
 	crashk_res.start = crash_base;
 	crashk_res.end = crash_base + crash_size - 1;
 	insert_resource(&iomem_resource, &crashk_res);