diff mbox series

[V2] powerpc/memhotplug: Add add_pages override for PPC

Message ID 20220627072708.75662-1-aneesh.kumar@linux.ibm.com (mailing list archive)
State New
Headers show
Series [V2] powerpc/memhotplug: Add add_pages override for PPC | expand

Commit Message

Aneesh Kumar K.V June 27, 2022, 7:27 a.m. UTC
With commit ffa0b64e3be5 ("powerpc: Fix virt_addr_valid() for 64-bit Book3E & 32-bit")
the kernel now validate the addr against high_memory value. This results
in the below BUG_ON with dax pfns.

[  635.798741][T26531] kernel BUG at mm/page_alloc.c:5521!
1:mon> e
cpu 0x1: Vector: 700 (Program Check) at [c000000007287630]
    pc: c00000000055ed48: free_pages.part.0+0x48/0x110
    lr: c00000000053ca70: tlb_finish_mmu+0x80/0xd0
    sp: c0000000072878d0
   msr: 800000000282b033
  current = 0xc00000000afabe00
  paca    = 0xc00000037ffff300   irqmask: 0x03   irq_happened: 0x05
    pid   = 26531, comm = 50-landscape-sy
kernel BUG at :5521!
Linux version 5.19.0-rc3-14659-g4ec05be7c2e1 (kvaneesh@ltc-boston8) (gcc (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0, GNU ld (GNU Binutils for Ubuntu) 2.34) #625 SMP Thu Jun 23 00:35:43 CDT 2022
1:mon> t
[link register   ] c00000000053ca70 tlb_finish_mmu+0x80/0xd0
[c0000000072878d0] c00000000053ca54 tlb_finish_mmu+0x64/0xd0 (unreliable)
[c000000007287900] c000000000539424 exit_mmap+0xe4/0x2a0
[c0000000072879e0] c00000000019fc1c mmput+0xcc/0x210
[c000000007287a20] c000000000629230 begin_new_exec+0x5e0/0xf40
[c000000007287ae0] c00000000070b3cc load_elf_binary+0x3ac/0x1e00
[c000000007287c10] c000000000627af0 bprm_execve+0x3b0/0xaf0
[c000000007287cd0] c000000000628414 do_execveat_common.isra.0+0x1e4/0x310
[c000000007287d80] c00000000062858c sys_execve+0x4c/0x60
[c000000007287db0] c00000000002c1b0 system_call_exception+0x160/0x2c0
[c000000007287e10] c00000000000c53c system_call_common+0xec/0x250

The fix is to make sure we update high_memory on memory hotplug.
This is similar to what x86 does in commit 3072e413e305 ("mm/memory_hotplug: introduce add_pages")

Fixes: ffa0b64e3be5 ("powerpc: Fix virt_addr_valid() for 64-bit Book3E & 32-bit")
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 arch/powerpc/Kconfig  |  4 ++++
 arch/powerpc/mm/mem.c | 32 +++++++++++++++++++++++++++++++-
 2 files changed, 35 insertions(+), 1 deletion(-)

Comments

Michael Ellerman June 28, 2022, 12:56 p.m. UTC | #1
"Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com> writes:
> With commit ffa0b64e3be5 ("powerpc: Fix virt_addr_valid() for 64-bit Book3E & 32-bit")
> the kernel now validate the addr against high_memory value. This results
> in the below BUG_ON with dax pfns.
>
> [  635.798741][T26531] kernel BUG at mm/page_alloc.c:5521!
> 1:mon> e
> cpu 0x1: Vector: 700 (Program Check) at [c000000007287630]
>     pc: c00000000055ed48: free_pages.part.0+0x48/0x110
>     lr: c00000000053ca70: tlb_finish_mmu+0x80/0xd0
>     sp: c0000000072878d0
>    msr: 800000000282b033
>   current = 0xc00000000afabe00
>   paca    = 0xc00000037ffff300   irqmask: 0x03   irq_happened: 0x05
>     pid   = 26531, comm = 50-landscape-sy
> kernel BUG at :5521!
> Linux version 5.19.0-rc3-14659-g4ec05be7c2e1 (kvaneesh@ltc-boston8) (gcc (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0, GNU ld (GNU Binutils for Ubuntu) 2.34) #625 SMP Thu Jun 23 00:35:43 CDT 2022
> 1:mon> t
> [link register   ] c00000000053ca70 tlb_finish_mmu+0x80/0xd0
> [c0000000072878d0] c00000000053ca54 tlb_finish_mmu+0x64/0xd0 (unreliable)
> [c000000007287900] c000000000539424 exit_mmap+0xe4/0x2a0
> [c0000000072879e0] c00000000019fc1c mmput+0xcc/0x210
> [c000000007287a20] c000000000629230 begin_new_exec+0x5e0/0xf40
> [c000000007287ae0] c00000000070b3cc load_elf_binary+0x3ac/0x1e00
> [c000000007287c10] c000000000627af0 bprm_execve+0x3b0/0xaf0
> [c000000007287cd0] c000000000628414 do_execveat_common.isra.0+0x1e4/0x310
> [c000000007287d80] c00000000062858c sys_execve+0x4c/0x60
> [c000000007287db0] c00000000002c1b0 system_call_exception+0x160/0x2c0
> [c000000007287e10] c00000000000c53c system_call_common+0xec/0x250
>
> The fix is to make sure we update high_memory on memory hotplug.
> This is similar to what x86 does in commit 3072e413e305 ("mm/memory_hotplug: introduce add_pages")
>
> Fixes: ffa0b64e3be5 ("powerpc: Fix virt_addr_valid() for 64-bit Book3E & 32-bit")
> Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
> Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
 
...

> diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
> index 52b77684acda..2a63920c369d 100644
> --- a/arch/powerpc/mm/mem.c
> +++ b/arch/powerpc/mm/mem.c
> @@ -105,6 +105,36 @@ void __ref arch_remove_linear_mapping(u64 start, u64 size)
>  	vm_unmap_aliases();
>  }
>  
> +/*
> + * After memory hotplug the variables max_pfn, max_low_pfn and high_memory need
> + * updating.
> + */
> +static void update_end_of_memory_vars(u64 start, u64 size)
> +{
> +	unsigned long end_pfn = PFN_UP(start + size);
> +
> +	if (end_pfn > max_pfn) {
> +		max_pfn = end_pfn;
> +		max_low_pfn = end_pfn;
> +		high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
> +	}
> +}
> +
> +int __ref add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages,
> +		    struct mhp_params *params)
> +{
> +	int ret;
> +
> +	ret = __add_pages(nid, start_pfn, nr_pages, params);
> +	WARN_ON_ONCE(ret);

What's the justification for making this a WARN_ON_ONCE(), and then
continuing to update the variables anyway?

I realise that's what x86 does, but it seems kind of wrong.

cheers

> +	/* update max_pfn, max_low_pfn and high_memory */
> +	update_end_of_memory_vars(start_pfn << PAGE_SHIFT,
> +				  nr_pages << PAGE_SHIFT);
> +
> +	return ret;
> +}
Aneesh Kumar K.V June 28, 2022, 2:43 p.m. UTC | #2
On 6/28/22 6:26 PM, Michael Ellerman wrote:
> "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com> writes:
>> With commit ffa0b64e3be5 ("powerpc: Fix virt_addr_valid() for 64-bit Book3E & 32-bit")
>> the kernel now validate the addr against high_memory value. This results
>> in the below BUG_ON with dax pfns.
>>
>> [  635.798741][T26531] kernel BUG at mm/page_alloc.c:5521!
>> 1:mon> e
>> cpu 0x1: Vector: 700 (Program Check) at [c000000007287630]
>>     pc: c00000000055ed48: free_pages.part.0+0x48/0x110
>>     lr: c00000000053ca70: tlb_finish_mmu+0x80/0xd0
>>     sp: c0000000072878d0
>>    msr: 800000000282b033
>>   current = 0xc00000000afabe00
>>   paca    = 0xc00000037ffff300   irqmask: 0x03   irq_happened: 0x05
>>     pid   = 26531, comm = 50-landscape-sy
>> kernel BUG at :5521!
>> Linux version 5.19.0-rc3-14659-g4ec05be7c2e1 (kvaneesh@ltc-boston8) (gcc (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0, GNU ld (GNU Binutils for Ubuntu) 2.34) #625 SMP Thu Jun 23 00:35:43 CDT 2022
>> 1:mon> t
>> [link register   ] c00000000053ca70 tlb_finish_mmu+0x80/0xd0
>> [c0000000072878d0] c00000000053ca54 tlb_finish_mmu+0x64/0xd0 (unreliable)
>> [c000000007287900] c000000000539424 exit_mmap+0xe4/0x2a0
>> [c0000000072879e0] c00000000019fc1c mmput+0xcc/0x210
>> [c000000007287a20] c000000000629230 begin_new_exec+0x5e0/0xf40
>> [c000000007287ae0] c00000000070b3cc load_elf_binary+0x3ac/0x1e00
>> [c000000007287c10] c000000000627af0 bprm_execve+0x3b0/0xaf0
>> [c000000007287cd0] c000000000628414 do_execveat_common.isra.0+0x1e4/0x310
>> [c000000007287d80] c00000000062858c sys_execve+0x4c/0x60
>> [c000000007287db0] c00000000002c1b0 system_call_exception+0x160/0x2c0
>> [c000000007287e10] c00000000000c53c system_call_common+0xec/0x250
>>
>> The fix is to make sure we update high_memory on memory hotplug.
>> This is similar to what x86 does in commit 3072e413e305 ("mm/memory_hotplug: introduce add_pages")
>>
>> Fixes: ffa0b64e3be5 ("powerpc: Fix virt_addr_valid() for 64-bit Book3E & 32-bit")
>> Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
>> Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
>  
> ...
> 
>> diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
>> index 52b77684acda..2a63920c369d 100644
>> --- a/arch/powerpc/mm/mem.c
>> +++ b/arch/powerpc/mm/mem.c
>> @@ -105,6 +105,36 @@ void __ref arch_remove_linear_mapping(u64 start, u64 size)
>>  	vm_unmap_aliases();
>>  }
>>  
>> +/*
>> + * After memory hotplug the variables max_pfn, max_low_pfn and high_memory need
>> + * updating.
>> + */
>> +static void update_end_of_memory_vars(u64 start, u64 size)
>> +{
>> +	unsigned long end_pfn = PFN_UP(start + size);
>> +
>> +	if (end_pfn > max_pfn) {
>> +		max_pfn = end_pfn;
>> +		max_low_pfn = end_pfn;
>> +		high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
>> +	}
>> +}
>> +
>> +int __ref add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages,
>> +		    struct mhp_params *params)
>> +{
>> +	int ret;
>> +
>> +	ret = __add_pages(nid, start_pfn, nr_pages, params);
>> +	WARN_ON_ONCE(ret);
> 
> What's the justification for making this a WARN_ON_ONCE(), and then
> continuing to update the variables anyway?
> 
> I realise that's what x86 does, but it seems kind of wrong.
> 
>

That is copy paste bug from my side. I guess we should skip updating vars on __add_pages failure.
WARN_ON_ONCE() was done as part of fe8b868eccb9f85a0e231e35f0abac5b39bac801. The original code 
did print a WARN all the time. So the above commit made it conditional. Later in ea0854170c95245a258b386c7a9314399c949fe0
the return value was never checked. So the error path is not handled there at all. 

we should do 

if (!ret) update_end_of_memory_vars(...); 

Do you want a v3 with the changes? 

-aneesh
diff mbox series

Patch

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index c2ce2e60c8f0..7aa12e88c580 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -358,6 +358,10 @@  config ARCH_SUSPEND_NONZERO_CPU
 	def_bool y
 	depends on PPC_POWERNV || PPC_PSERIES
 
+config ARCH_HAS_ADD_PAGES
+	def_bool y
+	depends on ARCH_ENABLE_MEMORY_HOTPLUG
+
 config PPC_DCR_NATIVE
 	bool
 
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 52b77684acda..2a63920c369d 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -105,6 +105,36 @@  void __ref arch_remove_linear_mapping(u64 start, u64 size)
 	vm_unmap_aliases();
 }
 
+/*
+ * After memory hotplug the variables max_pfn, max_low_pfn and high_memory need
+ * updating.
+ */
+static void update_end_of_memory_vars(u64 start, u64 size)
+{
+	unsigned long end_pfn = PFN_UP(start + size);
+
+	if (end_pfn > max_pfn) {
+		max_pfn = end_pfn;
+		max_low_pfn = end_pfn;
+		high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
+	}
+}
+
+int __ref add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages,
+		    struct mhp_params *params)
+{
+	int ret;
+
+	ret = __add_pages(nid, start_pfn, nr_pages, params);
+	WARN_ON_ONCE(ret);
+
+	/* update max_pfn, max_low_pfn and high_memory */
+	update_end_of_memory_vars(start_pfn << PAGE_SHIFT,
+				  nr_pages << PAGE_SHIFT);
+
+	return ret;
+}
+
 int __ref arch_add_memory(int nid, u64 start, u64 size,
 			  struct mhp_params *params)
 {
@@ -115,7 +145,7 @@  int __ref arch_add_memory(int nid, u64 start, u64 size,
 	rc = arch_create_linear_mapping(nid, start, size, params);
 	if (rc)
 		return rc;
-	rc = __add_pages(nid, start_pfn, nr_pages, params);
+	rc = add_pages(nid, start_pfn, nr_pages, params);
 	if (rc)
 		arch_remove_linear_mapping(start, size);
 	return rc;